{ "mmlu_avg": 59.89175331149409, "correct": 8410, "total": 14042, "by_subject": { "abstract_algebra": { "correct": 33, "total": 100, "acc": 33.0 }, "anatomy": { "correct": 83, "total": 135, "acc": 61.48148148148148 }, "astronomy": { "correct": 108, "total": 152, "acc": 71.05263157894737 }, "business_ethics": { "correct": 68, "total": 100, "acc": 68.0 }, "clinical_knowledge": { "correct": 183, "total": 265, "acc": 69.05660377358491 }, "college_biology": { "correct": 103, "total": 144, "acc": 71.52777777777777 }, "college_chemistry": { "correct": 42, "total": 100, "acc": 42.0 }, "college_computer_science": { "correct": 55, "total": 100, "acc": 55.0 }, "college_mathematics": { "correct": 32, "total": 100, "acc": 32.0 }, "college_medicine": { "correct": 109, "total": 173, "acc": 63.005780346820806 }, "college_physics": { "correct": 51, "total": 102, "acc": 50.0 }, "computer_security": { "correct": 74, "total": 100, "acc": 74.0 }, "conceptual_physics": { "correct": 154, "total": 235, "acc": 65.53191489361703 }, "econometrics": { "correct": 36, "total": 114, "acc": 31.57894736842105 }, "electrical_engineering": { "correct": 85, "total": 145, "acc": 58.62068965517241 }, "elementary_mathematics": { "correct": 115, "total": 378, "acc": 30.423280423280424 }, "formal_logic": { "correct": 63, "total": 126, "acc": 50.0 }, "global_facts": { "correct": 32, "total": 100, "acc": 32.0 }, "high_school_biology": { "correct": 249, "total": 310, "acc": 80.3225806451613 }, "high_school_chemistry": { "correct": 118, "total": 203, "acc": 58.12807881773399 }, "high_school_computer_science": { "correct": 75, "total": 100, "acc": 75.0 }, "high_school_european_history": { "correct": 128, "total": 165, "acc": 77.57575757575758 }, "high_school_geography": { "correct": 158, "total": 198, "acc": 79.79797979797979 }, "high_school_government_and_politics": { "correct": 161, "total": 193, "acc": 83.41968911917098 }, "high_school_macroeconomics": { "correct": 258, "total": 390, "acc": 66.15384615384616 }, "high_school_mathematics": { "correct": 26, "total": 270, "acc": 9.62962962962963 }, "high_school_microeconomics": { "correct": 175, "total": 238, "acc": 73.52941176470588 }, "high_school_physics": { "correct": 71, "total": 151, "acc": 47.019867549668874 }, "high_school_psychology": { "correct": 458, "total": 545, "acc": 84.03669724770643 }, "high_school_statistics": { "correct": 110, "total": 216, "acc": 50.925925925925924 }, "high_school_us_history": { "correct": 167, "total": 204, "acc": 81.86274509803921 }, "high_school_world_history": { "correct": 185, "total": 237, "acc": 78.05907172995781 }, "human_aging": { "correct": 154, "total": 223, "acc": 69.05829596412556 }, "human_sexuality": { "correct": 94, "total": 131, "acc": 71.7557251908397 }, "international_law": { "correct": 82, "total": 121, "acc": 67.76859504132231 }, "jurisprudence": { "correct": 79, "total": 108, "acc": 73.14814814814815 }, "logical_fallacies": { "correct": 125, "total": 163, "acc": 76.68711656441718 }, "machine_learning": { "correct": 55, "total": 112, "acc": 49.107142857142854 }, "management": { "correct": 81, "total": 103, "acc": 78.64077669902913 }, "marketing": { "correct": 203, "total": 234, "acc": 86.75213675213675 }, "medical_genetics": { "correct": 75, "total": 100, "acc": 75.0 }, "miscellaneous": { "correct": 616, "total": 783, "acc": 78.67177522349937 }, "moral_disputes": { "correct": 223, "total": 346, "acc": 64.45086705202313 }, "moral_scenarios": { "correct": 217, "total": 895, "acc": 24.24581005586592 }, "nutrition": { "correct": 200, "total": 306, "acc": 65.359477124183 }, "philosophy": { "correct": 217, "total": 311, "acc": 69.7749196141479 }, "prehistory": { "correct": 230, "total": 324, "acc": 70.98765432098766 }, "professional_accounting": { "correct": 111, "total": 282, "acc": 39.361702127659576 }, "professional_law": { "correct": 598, "total": 1534, "acc": 38.983050847457626 }, "professional_medicine": { "correct": 196, "total": 272, "acc": 72.05882352941177 }, "professional_psychology": { "correct": 388, "total": 612, "acc": 63.39869281045752 }, "public_relations": { "correct": 69, "total": 110, "acc": 62.72727272727273 }, "security_studies": { "correct": 162, "total": 245, "acc": 66.12244897959184 }, "sociology": { "correct": 168, "total": 201, "acc": 83.58208955223881 }, "us_foreign_policy": { "correct": 76, "total": 100, "acc": 76.0 }, "virology": { "correct": 84, "total": 166, "acc": 50.602409638554214 }, "world_religions": { "correct": 142, "total": 171, "acc": 83.04093567251462 } } }