初始化项目,由ModelHub XC社区提供模型
Model: my-ai-stack/Stack-3.0-Omni-Nexus Source: Original Platform
This commit is contained in:
5
benchmarks/arc.json
Normal file
5
benchmarks/arc.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"arc_challenge_avg": 83.2764505119454,
|
||||
"correct": 976,
|
||||
"total": 1172
|
||||
}
|
||||
10562
benchmarks/gsm8k.json
Normal file
10562
benchmarks/gsm8k.json
Normal file
File diff suppressed because it is too large
Load Diff
8
benchmarks/hellaswag.json
Normal file
8
benchmarks/hellaswag.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"benchmark": "hellaswag",
|
||||
"model": "omni-nexus-alpha-q8",
|
||||
"method": "chat-api (single generate, A/B/C/D pick)",
|
||||
"accuracy": 0.5960963951404102,
|
||||
"correct": 5986,
|
||||
"total": 10042
|
||||
}
|
||||
1157
benchmarks/humaneval.json
Normal file
1157
benchmarks/humaneval.json
Normal file
File diff suppressed because it is too large
Load Diff
4009
benchmarks/mbpp.json
Normal file
4009
benchmarks/mbpp.json
Normal file
File diff suppressed because it is too large
Load Diff
292
benchmarks/mmlu.json
Normal file
292
benchmarks/mmlu.json
Normal file
@@ -0,0 +1,292 @@
|
||||
{
|
||||
"mmlu_avg": 59.89175331149409,
|
||||
"correct": 8410,
|
||||
"total": 14042,
|
||||
"by_subject": {
|
||||
"abstract_algebra": {
|
||||
"correct": 33,
|
||||
"total": 100,
|
||||
"acc": 33.0
|
||||
},
|
||||
"anatomy": {
|
||||
"correct": 83,
|
||||
"total": 135,
|
||||
"acc": 61.48148148148148
|
||||
},
|
||||
"astronomy": {
|
||||
"correct": 108,
|
||||
"total": 152,
|
||||
"acc": 71.05263157894737
|
||||
},
|
||||
"business_ethics": {
|
||||
"correct": 68,
|
||||
"total": 100,
|
||||
"acc": 68.0
|
||||
},
|
||||
"clinical_knowledge": {
|
||||
"correct": 183,
|
||||
"total": 265,
|
||||
"acc": 69.05660377358491
|
||||
},
|
||||
"college_biology": {
|
||||
"correct": 103,
|
||||
"total": 144,
|
||||
"acc": 71.52777777777777
|
||||
},
|
||||
"college_chemistry": {
|
||||
"correct": 42,
|
||||
"total": 100,
|
||||
"acc": 42.0
|
||||
},
|
||||
"college_computer_science": {
|
||||
"correct": 55,
|
||||
"total": 100,
|
||||
"acc": 55.0
|
||||
},
|
||||
"college_mathematics": {
|
||||
"correct": 32,
|
||||
"total": 100,
|
||||
"acc": 32.0
|
||||
},
|
||||
"college_medicine": {
|
||||
"correct": 109,
|
||||
"total": 173,
|
||||
"acc": 63.005780346820806
|
||||
},
|
||||
"college_physics": {
|
||||
"correct": 51,
|
||||
"total": 102,
|
||||
"acc": 50.0
|
||||
},
|
||||
"computer_security": {
|
||||
"correct": 74,
|
||||
"total": 100,
|
||||
"acc": 74.0
|
||||
},
|
||||
"conceptual_physics": {
|
||||
"correct": 154,
|
||||
"total": 235,
|
||||
"acc": 65.53191489361703
|
||||
},
|
||||
"econometrics": {
|
||||
"correct": 36,
|
||||
"total": 114,
|
||||
"acc": 31.57894736842105
|
||||
},
|
||||
"electrical_engineering": {
|
||||
"correct": 85,
|
||||
"total": 145,
|
||||
"acc": 58.62068965517241
|
||||
},
|
||||
"elementary_mathematics": {
|
||||
"correct": 115,
|
||||
"total": 378,
|
||||
"acc": 30.423280423280424
|
||||
},
|
||||
"formal_logic": {
|
||||
"correct": 63,
|
||||
"total": 126,
|
||||
"acc": 50.0
|
||||
},
|
||||
"global_facts": {
|
||||
"correct": 32,
|
||||
"total": 100,
|
||||
"acc": 32.0
|
||||
},
|
||||
"high_school_biology": {
|
||||
"correct": 249,
|
||||
"total": 310,
|
||||
"acc": 80.3225806451613
|
||||
},
|
||||
"high_school_chemistry": {
|
||||
"correct": 118,
|
||||
"total": 203,
|
||||
"acc": 58.12807881773399
|
||||
},
|
||||
"high_school_computer_science": {
|
||||
"correct": 75,
|
||||
"total": 100,
|
||||
"acc": 75.0
|
||||
},
|
||||
"high_school_european_history": {
|
||||
"correct": 128,
|
||||
"total": 165,
|
||||
"acc": 77.57575757575758
|
||||
},
|
||||
"high_school_geography": {
|
||||
"correct": 158,
|
||||
"total": 198,
|
||||
"acc": 79.79797979797979
|
||||
},
|
||||
"high_school_government_and_politics": {
|
||||
"correct": 161,
|
||||
"total": 193,
|
||||
"acc": 83.41968911917098
|
||||
},
|
||||
"high_school_macroeconomics": {
|
||||
"correct": 258,
|
||||
"total": 390,
|
||||
"acc": 66.15384615384616
|
||||
},
|
||||
"high_school_mathematics": {
|
||||
"correct": 26,
|
||||
"total": 270,
|
||||
"acc": 9.62962962962963
|
||||
},
|
||||
"high_school_microeconomics": {
|
||||
"correct": 175,
|
||||
"total": 238,
|
||||
"acc": 73.52941176470588
|
||||
},
|
||||
"high_school_physics": {
|
||||
"correct": 71,
|
||||
"total": 151,
|
||||
"acc": 47.019867549668874
|
||||
},
|
||||
"high_school_psychology": {
|
||||
"correct": 458,
|
||||
"total": 545,
|
||||
"acc": 84.03669724770643
|
||||
},
|
||||
"high_school_statistics": {
|
||||
"correct": 110,
|
||||
"total": 216,
|
||||
"acc": 50.925925925925924
|
||||
},
|
||||
"high_school_us_history": {
|
||||
"correct": 167,
|
||||
"total": 204,
|
||||
"acc": 81.86274509803921
|
||||
},
|
||||
"high_school_world_history": {
|
||||
"correct": 185,
|
||||
"total": 237,
|
||||
"acc": 78.05907172995781
|
||||
},
|
||||
"human_aging": {
|
||||
"correct": 154,
|
||||
"total": 223,
|
||||
"acc": 69.05829596412556
|
||||
},
|
||||
"human_sexuality": {
|
||||
"correct": 94,
|
||||
"total": 131,
|
||||
"acc": 71.7557251908397
|
||||
},
|
||||
"international_law": {
|
||||
"correct": 82,
|
||||
"total": 121,
|
||||
"acc": 67.76859504132231
|
||||
},
|
||||
"jurisprudence": {
|
||||
"correct": 79,
|
||||
"total": 108,
|
||||
"acc": 73.14814814814815
|
||||
},
|
||||
"logical_fallacies": {
|
||||
"correct": 125,
|
||||
"total": 163,
|
||||
"acc": 76.68711656441718
|
||||
},
|
||||
"machine_learning": {
|
||||
"correct": 55,
|
||||
"total": 112,
|
||||
"acc": 49.107142857142854
|
||||
},
|
||||
"management": {
|
||||
"correct": 81,
|
||||
"total": 103,
|
||||
"acc": 78.64077669902913
|
||||
},
|
||||
"marketing": {
|
||||
"correct": 203,
|
||||
"total": 234,
|
||||
"acc": 86.75213675213675
|
||||
},
|
||||
"medical_genetics": {
|
||||
"correct": 75,
|
||||
"total": 100,
|
||||
"acc": 75.0
|
||||
},
|
||||
"miscellaneous": {
|
||||
"correct": 616,
|
||||
"total": 783,
|
||||
"acc": 78.67177522349937
|
||||
},
|
||||
"moral_disputes": {
|
||||
"correct": 223,
|
||||
"total": 346,
|
||||
"acc": 64.45086705202313
|
||||
},
|
||||
"moral_scenarios": {
|
||||
"correct": 217,
|
||||
"total": 895,
|
||||
"acc": 24.24581005586592
|
||||
},
|
||||
"nutrition": {
|
||||
"correct": 200,
|
||||
"total": 306,
|
||||
"acc": 65.359477124183
|
||||
},
|
||||
"philosophy": {
|
||||
"correct": 217,
|
||||
"total": 311,
|
||||
"acc": 69.7749196141479
|
||||
},
|
||||
"prehistory": {
|
||||
"correct": 230,
|
||||
"total": 324,
|
||||
"acc": 70.98765432098766
|
||||
},
|
||||
"professional_accounting": {
|
||||
"correct": 111,
|
||||
"total": 282,
|
||||
"acc": 39.361702127659576
|
||||
},
|
||||
"professional_law": {
|
||||
"correct": 598,
|
||||
"total": 1534,
|
||||
"acc": 38.983050847457626
|
||||
},
|
||||
"professional_medicine": {
|
||||
"correct": 196,
|
||||
"total": 272,
|
||||
"acc": 72.05882352941177
|
||||
},
|
||||
"professional_psychology": {
|
||||
"correct": 388,
|
||||
"total": 612,
|
||||
"acc": 63.39869281045752
|
||||
},
|
||||
"public_relations": {
|
||||
"correct": 69,
|
||||
"total": 110,
|
||||
"acc": 62.72727272727273
|
||||
},
|
||||
"security_studies": {
|
||||
"correct": 162,
|
||||
"total": 245,
|
||||
"acc": 66.12244897959184
|
||||
},
|
||||
"sociology": {
|
||||
"correct": 168,
|
||||
"total": 201,
|
||||
"acc": 83.58208955223881
|
||||
},
|
||||
"us_foreign_policy": {
|
||||
"correct": 76,
|
||||
"total": 100,
|
||||
"acc": 76.0
|
||||
},
|
||||
"virology": {
|
||||
"correct": 84,
|
||||
"total": 166,
|
||||
"acc": 50.602409638554214
|
||||
},
|
||||
"world_religions": {
|
||||
"correct": 142,
|
||||
"total": 171,
|
||||
"acc": 83.04093567251462
|
||||
}
|
||||
}
|
||||
}
|
||||
5
benchmarks/truthfulqa.json
Normal file
5
benchmarks/truthfulqa.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"truthfulqa_avg": 45.04283965728274,
|
||||
"correct": 368,
|
||||
"total": 817
|
||||
}
|
||||
8
benchmarks/winogrande.json
Normal file
8
benchmarks/winogrande.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"benchmark": "winogrande",
|
||||
"model": "omni-nexus-alpha-q8",
|
||||
"method": "chat-api (fill-blank, option word count)",
|
||||
"accuracy": 0.5201262825572218,
|
||||
"correct": 659,
|
||||
"total": 1267
|
||||
}
|
||||
Reference in New Issue
Block a user