LICENSE
README.md
pyproject.toml
src/ai2_olmo_eval.egg-info/PKG-INFO
src/ai2_olmo_eval.egg-info/SOURCES.txt
src/ai2_olmo_eval.egg-info/dependency_links.txt
src/ai2_olmo_eval.egg-info/requires.txt
src/ai2_olmo_eval.egg-info/top_level.txt
src/olmo_eval/__init__.py
src/olmo_eval/metrics.py
src/olmo_eval/tasks.py
src/olmo_eval/tokenizer.py
src/olmo_eval/util.py
src/olmo_eval/version.py
src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/dataset_info.json
src/olmo_eval/hf_datasets/ai2_arc/ARC-Challenge/validation/state.json
src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/dataset_info.json
src/olmo_eval/hf_datasets/ai2_arc/ARC-Easy/validation/state.json
src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/allenai/basic_arithmetic/none/validation/state.json
src/olmo_eval/hf_datasets/boolq/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/boolq/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/boolq/none/validation/state.json
src/olmo_eval/hf_datasets/glue/mrpc/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/glue/mrpc/validation/dataset_info.json
src/olmo_eval/hf_datasets/glue/mrpc/validation/state.json
src/olmo_eval/hf_datasets/glue/rte/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/glue/rte/validation/dataset_info.json
src/olmo_eval/hf_datasets/glue/rte/validation/state.json
src/olmo_eval/hf_datasets/glue/sst2/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/glue/sst2/validation/dataset_info.json
src/olmo_eval/hf_datasets/glue/sst2/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/abstract_algebra/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/anatomy/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/astronomy/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/business_ethics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/clinical_knowledge/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_biology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_chemistry/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_computer_science/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_mathematics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_medicine/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/college_physics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/computer_security/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/conceptual_physics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/econometrics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/electrical_engineering/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/elementary_mathematics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/formal_logic/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/global_facts/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_biology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_chemistry/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_computer_science/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_european_history/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_geography/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_government_and_politics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_macroeconomics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_mathematics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_microeconomics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_physics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_psychology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_statistics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_us_history/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/high_school_world_history/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_aging/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/human_sexuality/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/international_law/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/jurisprudence/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/logical_fallacies/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/machine_learning/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/management/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/marketing/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/medical_genetics/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/miscellaneous/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_disputes/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/moral_scenarios/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/nutrition/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/philosophy/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/prehistory/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_accounting/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_law/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_medicine/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/professional_psychology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/public_relations/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/security_studies/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/sociology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/us_foreign_policy/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/virology/validation/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/dev/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/test/state.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/dataset_info.json
src/olmo_eval/hf_datasets/hails/mmlu_no_train/world_religions/validation/state.json
src/olmo_eval/hf_datasets/hellaswag/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/hellaswag/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/hellaswag/none/validation/state.json
src/olmo_eval/hf_datasets/nq_open/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/nq_open/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/nq_open/none/validation/state.json
src/olmo_eval/hf_datasets/openbookqa/main/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/openbookqa/main/validation/dataset_info.json
src/olmo_eval/hf_datasets/openbookqa/main/validation/state.json
src/olmo_eval/hf_datasets/piqa/plain_text/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/piqa/plain_text/validation/dataset_info.json
src/olmo_eval/hf_datasets/piqa/plain_text/validation/state.json
src/olmo_eval/hf_datasets/sciq/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/sciq/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/sciq/none/validation/state.json
src/olmo_eval/hf_datasets/social_i_qa/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/social_i_qa/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/social_i_qa/none/validation/state.json
src/olmo_eval/hf_datasets/super_glue/cb/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/super_glue/cb/validation/dataset_info.json
src/olmo_eval/hf_datasets/super_glue/cb/validation/state.json
src/olmo_eval/hf_datasets/super_glue/copa/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/super_glue/copa/validation/dataset_info.json
src/olmo_eval/hf_datasets/super_glue/copa/validation/state.json
src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/dataset_info.json
src/olmo_eval/hf_datasets/tau/commonsense_qa/none/validation/state.json
src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/dataset_info.json
src/olmo_eval/hf_datasets/trivia_qa/rc.wikipedia.nocontext/validation/state.json
src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/data-00000-of-00001.arrow
src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/dataset_info.json
src/olmo_eval/hf_datasets/winogrande/winogrande_xl/validation/state.json
src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/test_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/test_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_challenge/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/test_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/test_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/arc_easy/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/boolq/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/boolq/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/boolq/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/boolq/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/boolq/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/codex_humaneval/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/codex_mbpp/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/copa/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/copa/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/copycolors/10way/config.json
src/olmo_eval/oe_eval_tasks/copycolors/10way/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/config.json
src/olmo_eval/oe_eval_tasks/copycolors/xl_10way/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/csqa/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/csqa/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/csqa/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/csqa/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/csqa/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/config.json
src/olmo_eval/oe_eval_tasks/gsm8k/gold_bpb_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/hellaswag/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/hellaswag/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/hellaswag/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/hellaswag/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/hellaswag/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_algebra/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_counting_and_probability/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_geometry/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_intermediate_algebra/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_number_theory/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_prealgebra/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/config.json
src/olmo_eval/oe_eval_tasks/minerva_math_precalculus/gold_bpb_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/test_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/test_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/openbookqa/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/piqa/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/piqa/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/piqa/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/piqa/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/piqa/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/sciq/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/socialiqa/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/socialiqa/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/socialiqa/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/socialiqa/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/socialiqa/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/winogrande/mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/config.json
src/olmo_eval/oe_eval_tasks/winogrande/rc_0shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/winogrande/rc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/config.json
src/olmo_eval/oe_eval_tasks/winogrande/val_mc_5shot/requests.jsonl.gz
src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/config.json
src/olmo_eval/oe_eval_tasks/winogrande/val_rc_5shot/requests.jsonl.gz
src/olmo_eval/tokenizers/allenai_eleuther-ai-gpt-neox-20b-pii-special.json
src/olmo_eval/tokenizers/allenai_gpt-neox-olmo-dolma-v1_5.json