LICENSE
MANIFEST.in
README.md
setup.py
example/outputs.json
src/alpaca_eval/__init__.py
src/alpaca_eval/analyze.py
src/alpaca_eval/completion_parsers.py
src/alpaca_eval/constants.py
src/alpaca_eval/main.py
src/alpaca_eval/metrics.py
src/alpaca_eval/plotting.py
src/alpaca_eval/processors.py
src/alpaca_eval/types.py
src/alpaca_eval/utils.py
src/alpaca_eval.egg-info/PKG-INFO
src/alpaca_eval.egg-info/SOURCES.txt
src/alpaca_eval.egg-info/dependency_links.txt
src/alpaca_eval.egg-info/entry_points.txt
src/alpaca_eval.egg-info/requires.txt
src/alpaca_eval.egg-info/top_level.txt
src/alpaca_eval/annotators/__init__.py
src/alpaca_eval/annotators/base.py
src/alpaca_eval/annotators/pairwise_evaluator.py
src/alpaca_eval/decoders/__init__.py
src/alpaca_eval/decoders/anthropic.py
src/alpaca_eval/decoders/bedrock_anthropic.py
src/alpaca_eval/decoders/cohere.py
src/alpaca_eval/decoders/google.py
src/alpaca_eval/decoders/huggingface_api.py
src/alpaca_eval/decoders/huggingface_local.py
src/alpaca_eval/decoders/jinachat.py
src/alpaca_eval/decoders/openai.py
src/alpaca_eval/decoders/replicate.py
src/alpaca_eval/decoders/vllm_local.py
src/alpaca_eval/evaluators_configs/README.md
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_cot_gpt4_turbo/alpaca_eval_clf_cot.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_cot_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_gpt4_turbo/alpaca_eval_clf.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_clf_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_cot_gpt4_turbo_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4/alpaca_eval.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_0314/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_0613/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/alpaca_eval_fn.txt
src/alpaca_eval/evaluators_configs/alpaca_eval_gpt4_turbo_fn/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_chat_v0_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_chat_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_cot_json_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b1_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b4_cot_json_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_diana_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_joe_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/chatml_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/configs.yaml
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b1_v0_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b1_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b4_reasoning_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm/text_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm_greedy_gpt4/chatml_b5_without_inputs.txt
src/alpaca_eval/evaluators_configs/alpaca_farm_greedy_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/aviary_gpt4/aviary_prompt.txt
src/alpaca_eval/evaluators_configs/aviary_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/bedrock_claude/configs.yaml
src/alpaca_eval/evaluators_configs/bedrock_claude_2/configs.yaml
src/alpaca_eval/evaluators_configs/chatgpt/basic_prompt.txt
src/alpaca_eval/evaluators_configs/chatgpt/configs.yaml
src/alpaca_eval/evaluators_configs/chatgpt_fn/basic_function_prompt.txt
src/alpaca_eval/evaluators_configs/chatgpt_fn/configs.yaml
src/alpaca_eval/evaluators_configs/claude/basic_prompt.txt
src/alpaca_eval/evaluators_configs/claude/configs.yaml
src/alpaca_eval/evaluators_configs/claude_2/configs.yaml
src/alpaca_eval/evaluators_configs/claude_ranking/configs.yaml
src/alpaca_eval/evaluators_configs/claude_ranking/ranking_prompt.txt
src/alpaca_eval/evaluators_configs/cohere/configs.yaml
src/alpaca_eval/evaluators_configs/gpt35_turbo_instruct/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_clf/basic_clf_prompt.txt
src/alpaca_eval/evaluators_configs/gpt4_turbo_clf/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_clf/basic_clf_cot_prompt.txt
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_clf/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_cot_logprob/configs.yaml
src/alpaca_eval/evaluators_configs/gpt4_turbo_logprob/configs.yaml
src/alpaca_eval/evaluators_configs/guanaco_33b/basic_prompt.txt
src/alpaca_eval/evaluators_configs/guanaco_33b/configs.yaml
src/alpaca_eval/evaluators_configs/improved_aviary_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/improved_lmsys_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/lmsys_gpt4/configs.yaml
src/alpaca_eval/evaluators_configs/lmsys_gpt4/lmsys_prompt.txt
src/alpaca_eval/evaluators_configs/oasst_pythia_12b/basic_prompt.txt
src/alpaca_eval/evaluators_configs/oasst_pythia_12b/configs.yaml
src/alpaca_eval/evaluators_configs/test/configs.yaml
src/alpaca_eval/evaluators_configs/text_davinci_003/basic_prompt.txt
src/alpaca_eval/evaluators_configs/text_davinci_003/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_cot_gpt4_turbo/configs.yaml
src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_gpt4_turbo/configs.yaml
src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/chatgpt_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/claude_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval/text_davinci_003_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/alpaca_eval_cot_gpt4_turbo_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/alpaca_eval_gpt4_turbo_fn_leaderboard.csv
src/alpaca_eval/leaderboards/data_AlpacaEval_2/weighted_alpaca_eval_gpt4_turbo_leaderboard.csv
src/alpaca_eval/leaderboards/evaluators/evaluators_leaderboard.csv
src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/configs.yaml
src/alpaca_eval/models_configs/LMCocktail-10.7B-v1/prompt.txt
src/alpaca_eval/models_configs/Mistral-7B-Instruct-v0.2/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1/configs.yaml
src/alpaca_eval/models_configs/Mixtral-8x7B-Instruct-v0.1/togetherai_prompt.txt
src/alpaca_eval/models_configs/OpenHermes-2.5-Mistral-7B/configs.yaml
src/alpaca_eval/models_configs/OpenHermes-2.5-Mistral-7B/togetherai_prompt.txt
src/alpaca_eval/models_configs/Yi-34B-Chat/configs.yaml
src/alpaca_eval/models_configs/Yi-34B-Chat/prompt.txt
src/alpaca_eval/models_configs/Yi-34B-Chat-Verified/configs.yaml
src/alpaca_eval/models_configs/Yi-34B-Chat-Verified/prompt.txt
src/alpaca_eval/models_configs/airoboros-33b/configs.yaml
src/alpaca_eval/models_configs/airoboros-33b/prompt.txt
src/alpaca_eval/models_configs/airoboros-65b/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b/prompt.txt
src/alpaca_eval/models_configs/alpaca-7b-neft/configs.yaml
src/alpaca_eval/models_configs/alpaca-7b-neft/prompt.txt
src/alpaca_eval/models_configs/alpaca-farm-ppo-human/configs.yaml
src/alpaca_eval/models_configs/alpaca-farm-ppo-sim-gpt4-20k/configs.yaml
src/alpaca_eval/models_configs/baichuan-13b-chat/configs.yaml
src/alpaca_eval/models_configs/baichuan-13b-chat/prompt.txt
src/alpaca_eval/models_configs/baize-v2-13b/configs.yaml
src/alpaca_eval/models_configs/baize-v2-13b/prompt.txt
src/alpaca_eval/models_configs/baize-v2-7b/configs.yaml
src/alpaca_eval/models_configs/bedrock_claude/configs.yaml
src/alpaca_eval/models_configs/bedrock_claude_2/configs.yaml
src/alpaca_eval/models_configs/causallm-14b/configs.yaml
src/alpaca_eval/models_configs/causallm-14b/prompt.txt
src/alpaca_eval/models_configs/chatglm2-6b/configs.yaml
src/alpaca_eval/models_configs/chatglm2-6b/prompt.txt
src/alpaca_eval/models_configs/claude/configs.yaml
src/alpaca_eval/models_configs/claude/prompt.txt
src/alpaca_eval/models_configs/claude-2/configs.yaml
src/alpaca_eval/models_configs/claude-2.1/configs.yaml
src/alpaca_eval/models_configs/claude2-alpaca-13b/configs.yaml
src/alpaca_eval/models_configs/cohere/configs.yaml
src/alpaca_eval/models_configs/cohere/prompt.txt
src/alpaca_eval/models_configs/cut-13b/configs.yaml
src/alpaca_eval/models_configs/cut-13b/prompt.txt
src/alpaca_eval/models_configs/deita-7b-v1.0/configs.yaml
src/alpaca_eval/models_configs/evo-7b/configs.yaml
src/alpaca_eval/models_configs/evo-v2-7b/configs.yaml
src/alpaca_eval/models_configs/falcon-40b-instruct/configs.yaml
src/alpaca_eval/models_configs/falcon-7b-instruct/configs.yaml
src/alpaca_eval/models_configs/gemini-pro/configs.yaml
src/alpaca_eval/models_configs/gemini-pro/prompt.txt
src/alpaca_eval/models_configs/gpt-3.5-turbo-0301/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-1106/configs.yaml
src/alpaca_eval/models_configs/gpt-3.5-turbo-16k-0613/configs.yaml
src/alpaca_eval/models_configs/gpt35_turbo_instruct/configs.yaml
src/alpaca_eval/models_configs/gpt4/chatml_prompt.txt
src/alpaca_eval/models_configs/gpt4/configs.yaml
src/alpaca_eval/models_configs/gpt4_0314/configs.yaml
src/alpaca_eval/models_configs/gpt4_0613/configs.yaml
src/alpaca_eval/models_configs/gpt4_1106_preview/configs.yaml
src/alpaca_eval/models_configs/gpt4_turbo/chatml_prompt.txt
src/alpaca_eval/models_configs/gpt4_turbo/configs.yaml
src/alpaca_eval/models_configs/guanaco-13b/configs.yaml
src/alpaca_eval/models_configs/guanaco-33b/configs.yaml
src/alpaca_eval/models_configs/guanaco-33b-api/configs.yaml
src/alpaca_eval/models_configs/guanaco-65b/configs.yaml
src/alpaca_eval/models_configs/guanaco-7b/configs.yaml
src/alpaca_eval/models_configs/guanaco-7b/prompt.txt
src/alpaca_eval/models_configs/humpback-llama-65b/configs.yaml
src/alpaca_eval/models_configs/humpback-llama-65b/prompt.txt
src/alpaca_eval/models_configs/humpback-llama2-70b/configs.yaml
src/alpaca_eval/models_configs/jina-chat/configs.yaml
src/alpaca_eval/models_configs/jina-chat/prompt.txt
src/alpaca_eval/models_configs/llama-2-13b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-70b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-70b-chat-hf/prompt.txt
src/alpaca_eval/models_configs/llama-2-7b-chat-hf/configs.yaml
src/alpaca_eval/models_configs/llama-2-7b-chat-hf/prompt.txt
src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/configs.yaml
src/alpaca_eval/models_configs/llama-2-chat-7b-evol70k-neft/prompt.txt
src/alpaca_eval/models_configs/minichat-1.5-3b/configs.yaml
src/alpaca_eval/models_configs/minichat-3b/configs.yaml
src/alpaca_eval/models_configs/minichat-3b/prompt.txt
src/alpaca_eval/models_configs/minotaur-13b/configs.yaml
src/alpaca_eval/models_configs/minotaur-13b/prompt.txt
src/alpaca_eval/models_configs/nous-hermes-13b/configs.yaml
src/alpaca_eval/models_configs/nous-hermes-13b/prompt.txt
src/alpaca_eval/models_configs/oasst-rlhf-llama-33b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-llama-33b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-llama-33b/prompt.txt
src/alpaca_eval/models_configs/oasst-sft-pythia-12b/configs.yaml
src/alpaca_eval/models_configs/oasst-sft-pythia-12b/prompt.txt
src/alpaca_eval/models_configs/openbuddy-falcon-40b-v9/configs.yaml
src/alpaca_eval/models_configs/openbuddy-falcon-40b-v9/prompt.txt
src/alpaca_eval/models_configs/openbuddy-falcon-7b-v6/configs.yaml
src/alpaca_eval/models_configs/openbuddy-falcon-7b-v6/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama-30b-v7.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama-30b-v7.1/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama-65b-v8/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama-65b-v8/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama2-13b-v11.1/prompt.txt
src/alpaca_eval/models_configs/openbuddy-llama2-70b-v10.1/configs.yaml
src/alpaca_eval/models_configs/openbuddy-llama2-70b-v10.1/prompt.txt
src/alpaca_eval/models_configs/openchat-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-13b/prompt.txt
src/alpaca_eval/models_configs/openchat-v2-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-v2-w-13b/configs.yaml
src/alpaca_eval/models_configs/openchat-v3.1-13b/configs.yaml
src/alpaca_eval/models_configs/openchat8192-13b/configs.yaml
src/alpaca_eval/models_configs/opencoderplus-15b/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-13b/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-70b/configs.yaml
src/alpaca_eval/models_configs/pairrm-tulu-2-70b/prompt.txt
src/alpaca_eval/models_configs/pairrm-zephyr-7b-beta/configs.yaml
src/alpaca_eval/models_configs/phi-2/configs.yaml
src/alpaca_eval/models_configs/phi-2/prompt.txt
src/alpaca_eval/models_configs/phi-2-dpo/configs.yaml
src/alpaca_eval/models_configs/phi-2-dpo/prompt.txt
src/alpaca_eval/models_configs/phi-2-sft/configs.yaml
src/alpaca_eval/models_configs/phi-2-sft/prompt.txt
src/alpaca_eval/models_configs/platolm-7b/configs.yaml
src/alpaca_eval/models_configs/platolm-7b/prompt.txt
src/alpaca_eval/models_configs/pythia-12b-mix-sft/configs.yaml
src/alpaca_eval/models_configs/recycled-wizardlm-7b-v1.0/configs.yaml
src/alpaca_eval/models_configs/recycled-wizardlm-7b-v2.0/configs.yaml
src/alpaca_eval/models_configs/text_davinci_001/configs.yaml
src/alpaca_eval/models_configs/text_davinci_003/configs.yaml
src/alpaca_eval/models_configs/text_davinci_003/prompt.txt
src/alpaca_eval/models_configs/tulu-2-dpo-13b/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-70b/configs.yaml
src/alpaca_eval/models_configs/tulu-2-dpo-70b/prompt.txt
src/alpaca_eval/models_configs/tulu-2-dpo-7b/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-best-of-16/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-best-of-16/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-v2.0/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-v2.0/prompt.txt
src/alpaca_eval/models_configs/ultralm-13b-v2.0-best-of-16/configs.yaml
src/alpaca_eval/models_configs/ultralm-13b-v2.0-best-of-16/prompt.txt
src/alpaca_eval/models_configs/vicuna-13b/configs.yaml
src/alpaca_eval/models_configs/vicuna-13b-v1.3/configs.yaml
src/alpaca_eval/models_configs/vicuna-33b-v1.3/configs.yaml
src/alpaca_eval/models_configs/vicuna-7b/configs.yaml
src/alpaca_eval/models_configs/vicuna-7b/prompt.txt
src/alpaca_eval/models_configs/vicuna-7b-v1.3/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b/prompt.txt
src/alpaca_eval/models_configs/wizardlm-13b-v1.1/configs.yaml
src/alpaca_eval/models_configs/wizardlm-13b-v1.2/configs.yaml
src/alpaca_eval/models_configs/xwinlm-13b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-70b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-7b-v0.1/configs.yaml
src/alpaca_eval/models_configs/xwinlm-7b-v0.1/prompt.txt
src/alpaca_eval/models_configs/zephyr-7b-alpha/configs.yaml
src/alpaca_eval/models_configs/zephyr-7b-alpha/prompt.txt
src/alpaca_eval/models_configs/zephyr-7b-beta/configs.yaml
tests/test_analyze.py
tests/test_decoders_unit.py
tests/test_main.py
tests/test_pairwise_evaluator.py