Files
Stack-3.0-Omni-Nexus/benchmarks/mmlu.json
ModelHub XC 5d9af16e3d 初始化项目,由ModelHub XC社区提供模型
Model: my-ai-stack/Stack-3.0-Omni-Nexus
Source: Original Platform
2026-05-07 16:44:07 +08:00

292 lines
5.9 KiB
JSON

{
"mmlu_avg": 59.89175331149409,
"correct": 8410,
"total": 14042,
"by_subject": {
"abstract_algebra": {
"correct": 33,
"total": 100,
"acc": 33.0
},
"anatomy": {
"correct": 83,
"total": 135,
"acc": 61.48148148148148
},
"astronomy": {
"correct": 108,
"total": 152,
"acc": 71.05263157894737
},
"business_ethics": {
"correct": 68,
"total": 100,
"acc": 68.0
},
"clinical_knowledge": {
"correct": 183,
"total": 265,
"acc": 69.05660377358491
},
"college_biology": {
"correct": 103,
"total": 144,
"acc": 71.52777777777777
},
"college_chemistry": {
"correct": 42,
"total": 100,
"acc": 42.0
},
"college_computer_science": {
"correct": 55,
"total": 100,
"acc": 55.0
},
"college_mathematics": {
"correct": 32,
"total": 100,
"acc": 32.0
},
"college_medicine": {
"correct": 109,
"total": 173,
"acc": 63.005780346820806
},
"college_physics": {
"correct": 51,
"total": 102,
"acc": 50.0
},
"computer_security": {
"correct": 74,
"total": 100,
"acc": 74.0
},
"conceptual_physics": {
"correct": 154,
"total": 235,
"acc": 65.53191489361703
},
"econometrics": {
"correct": 36,
"total": 114,
"acc": 31.57894736842105
},
"electrical_engineering": {
"correct": 85,
"total": 145,
"acc": 58.62068965517241
},
"elementary_mathematics": {
"correct": 115,
"total": 378,
"acc": 30.423280423280424
},
"formal_logic": {
"correct": 63,
"total": 126,
"acc": 50.0
},
"global_facts": {
"correct": 32,
"total": 100,
"acc": 32.0
},
"high_school_biology": {
"correct": 249,
"total": 310,
"acc": 80.3225806451613
},
"high_school_chemistry": {
"correct": 118,
"total": 203,
"acc": 58.12807881773399
},
"high_school_computer_science": {
"correct": 75,
"total": 100,
"acc": 75.0
},
"high_school_european_history": {
"correct": 128,
"total": 165,
"acc": 77.57575757575758
},
"high_school_geography": {
"correct": 158,
"total": 198,
"acc": 79.79797979797979
},
"high_school_government_and_politics": {
"correct": 161,
"total": 193,
"acc": 83.41968911917098
},
"high_school_macroeconomics": {
"correct": 258,
"total": 390,
"acc": 66.15384615384616
},
"high_school_mathematics": {
"correct": 26,
"total": 270,
"acc": 9.62962962962963
},
"high_school_microeconomics": {
"correct": 175,
"total": 238,
"acc": 73.52941176470588
},
"high_school_physics": {
"correct": 71,
"total": 151,
"acc": 47.019867549668874
},
"high_school_psychology": {
"correct": 458,
"total": 545,
"acc": 84.03669724770643
},
"high_school_statistics": {
"correct": 110,
"total": 216,
"acc": 50.925925925925924
},
"high_school_us_history": {
"correct": 167,
"total": 204,
"acc": 81.86274509803921
},
"high_school_world_history": {
"correct": 185,
"total": 237,
"acc": 78.05907172995781
},
"human_aging": {
"correct": 154,
"total": 223,
"acc": 69.05829596412556
},
"human_sexuality": {
"correct": 94,
"total": 131,
"acc": 71.7557251908397
},
"international_law": {
"correct": 82,
"total": 121,
"acc": 67.76859504132231
},
"jurisprudence": {
"correct": 79,
"total": 108,
"acc": 73.14814814814815
},
"logical_fallacies": {
"correct": 125,
"total": 163,
"acc": 76.68711656441718
},
"machine_learning": {
"correct": 55,
"total": 112,
"acc": 49.107142857142854
},
"management": {
"correct": 81,
"total": 103,
"acc": 78.64077669902913
},
"marketing": {
"correct": 203,
"total": 234,
"acc": 86.75213675213675
},
"medical_genetics": {
"correct": 75,
"total": 100,
"acc": 75.0
},
"miscellaneous": {
"correct": 616,
"total": 783,
"acc": 78.67177522349937
},
"moral_disputes": {
"correct": 223,
"total": 346,
"acc": 64.45086705202313
},
"moral_scenarios": {
"correct": 217,
"total": 895,
"acc": 24.24581005586592
},
"nutrition": {
"correct": 200,
"total": 306,
"acc": 65.359477124183
},
"philosophy": {
"correct": 217,
"total": 311,
"acc": 69.7749196141479
},
"prehistory": {
"correct": 230,
"total": 324,
"acc": 70.98765432098766
},
"professional_accounting": {
"correct": 111,
"total": 282,
"acc": 39.361702127659576
},
"professional_law": {
"correct": 598,
"total": 1534,
"acc": 38.983050847457626
},
"professional_medicine": {
"correct": 196,
"total": 272,
"acc": 72.05882352941177
},
"professional_psychology": {
"correct": 388,
"total": 612,
"acc": 63.39869281045752
},
"public_relations": {
"correct": 69,
"total": 110,
"acc": 62.72727272727273
},
"security_studies": {
"correct": 162,
"total": 245,
"acc": 66.12244897959184
},
"sociology": {
"correct": 168,
"total": 201,
"acc": 83.58208955223881
},
"us_foreign_policy": {
"correct": 76,
"total": 100,
"acc": 76.0
},
"virology": {
"correct": 84,
"total": 166,
"acc": 50.602409638554214
},
"world_religions": {
"correct": 142,
"total": 171,
"acc": 83.04093567251462
}
}
}