初始化项目,由ModelHub XC社区提供模型

Model: strykes/emberforge-3b-reasoner
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-30 19:09:18 +08:00
commit 7c36fbd792
28 changed files with 5552 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
task metric value
arc_challenge acc_norm,none 0.3174061433447099
boolq acc,none 0.7437308868501529
gsm8k exact_match,flexible-extract 0.6239575435936315
hellaswag acc_norm,none 0.560744871539534
mmlu acc,none 0.5997721122347244
mmlu_abstract_algebra acc,none 0.43
mmlu_anatomy acc,none 0.6074074074074074
mmlu_astronomy acc,none 0.6973684210526315
mmlu_business_ethics acc,none 0.62
mmlu_clinical_knowledge acc,none 0.6415094339622641
mmlu_college_biology acc,none 0.8263888888888888
mmlu_college_chemistry acc,none 0.53
mmlu_college_computer_science acc,none 0.54
mmlu_college_mathematics acc,none 0.5
mmlu_college_medicine acc,none 0.5953757225433526
mmlu_college_physics acc,none 0.5
mmlu_computer_security acc,none 0.68
mmlu_conceptual_physics acc,none 0.5872340425531914
mmlu_econometrics acc,none 0.35964912280701755
mmlu_electrical_engineering acc,none 0.6413793103448275
mmlu_elementary_mathematics acc,none 0.5317460317460317
mmlu_formal_logic acc,none 0.5
mmlu_global_facts acc,none 0.33
mmlu_high_school_biology acc,none 0.7548387096774194
mmlu_high_school_chemistry acc,none 0.6009852216748769
mmlu_high_school_computer_science acc,none 0.69
mmlu_high_school_european_history acc,none 0.7696969696969697
mmlu_high_school_geography acc,none 0.7272727272727273
mmlu_high_school_government_and_politics acc,none 0.7461139896373057
mmlu_high_school_macroeconomics acc,none 0.6435897435897436
mmlu_high_school_mathematics acc,none 0.45555555555555555
mmlu_high_school_microeconomics acc,none 0.7773109243697479
mmlu_high_school_physics acc,none 0.5165562913907285
mmlu_high_school_psychology acc,none 0.8
mmlu_high_school_statistics acc,none 0.5694444444444444
mmlu_high_school_us_history acc,none 0.7156862745098039
mmlu_high_school_world_history acc,none 0.7974683544303798
mmlu_human_aging acc,none 0.600896860986547
mmlu_human_sexuality acc,none 0.6946564885496184
mmlu_humanities acc,none 0.5300743889479277
mmlu_international_law acc,none 0.7851239669421488
mmlu_jurisprudence acc,none 0.7222222222222222
mmlu_logical_fallacies acc,none 0.6932515337423313
mmlu_machine_learning acc,none 0.42857142857142855
mmlu_management acc,none 0.6893203883495146
mmlu_marketing acc,none 0.8034188034188035
mmlu_medical_genetics acc,none 0.69
mmlu_miscellaneous acc,none 0.6717752234993615
mmlu_moral_disputes acc,none 0.5953757225433526
mmlu_moral_scenarios acc,none 0.2446927374301676
mmlu_nutrition acc,none 0.6764705882352942
mmlu_other acc,none 0.6269713550048278
mmlu_philosophy acc,none 0.6559485530546624
mmlu_prehistory acc,none 0.6265432098765432
mmlu_professional_accounting acc,none 0.4397163120567376
mmlu_professional_law acc,none 0.4745762711864407
mmlu_professional_medicine acc,none 0.6838235294117647
mmlu_professional_psychology acc,none 0.5915032679738562
mmlu_public_relations acc,none 0.6
mmlu_security_studies acc,none 0.7020408163265306
mmlu_social_sciences acc,none 0.6906077348066298
mmlu_sociology acc,none 0.7711442786069652
mmlu_stem acc,none 0.5883285759594037
mmlu_us_foreign_policy acc,none 0.78
mmlu_virology acc,none 0.45180722891566266
mmlu_world_religions acc,none 0.7192982456140351
piqa acc_norm,none 0.6322089227421109
truthfulqa_mc2 acc,none 0.45340473177307805
winogrande acc,none 0.500394632991318
1 task metric value
2 arc_challenge acc_norm,none 0.3174061433447099
3 boolq acc,none 0.7437308868501529
4 gsm8k exact_match,flexible-extract 0.6239575435936315
5 hellaswag acc_norm,none 0.560744871539534
6 mmlu acc,none 0.5997721122347244
7 mmlu_abstract_algebra acc,none 0.43
8 mmlu_anatomy acc,none 0.6074074074074074
9 mmlu_astronomy acc,none 0.6973684210526315
10 mmlu_business_ethics acc,none 0.62
11 mmlu_clinical_knowledge acc,none 0.6415094339622641
12 mmlu_college_biology acc,none 0.8263888888888888
13 mmlu_college_chemistry acc,none 0.53
14 mmlu_college_computer_science acc,none 0.54
15 mmlu_college_mathematics acc,none 0.5
16 mmlu_college_medicine acc,none 0.5953757225433526
17 mmlu_college_physics acc,none 0.5
18 mmlu_computer_security acc,none 0.68
19 mmlu_conceptual_physics acc,none 0.5872340425531914
20 mmlu_econometrics acc,none 0.35964912280701755
21 mmlu_electrical_engineering acc,none 0.6413793103448275
22 mmlu_elementary_mathematics acc,none 0.5317460317460317
23 mmlu_formal_logic acc,none 0.5
24 mmlu_global_facts acc,none 0.33
25 mmlu_high_school_biology acc,none 0.7548387096774194
26 mmlu_high_school_chemistry acc,none 0.6009852216748769
27 mmlu_high_school_computer_science acc,none 0.69
28 mmlu_high_school_european_history acc,none 0.7696969696969697
29 mmlu_high_school_geography acc,none 0.7272727272727273
30 mmlu_high_school_government_and_politics acc,none 0.7461139896373057
31 mmlu_high_school_macroeconomics acc,none 0.6435897435897436
32 mmlu_high_school_mathematics acc,none 0.45555555555555555
33 mmlu_high_school_microeconomics acc,none 0.7773109243697479
34 mmlu_high_school_physics acc,none 0.5165562913907285
35 mmlu_high_school_psychology acc,none 0.8
36 mmlu_high_school_statistics acc,none 0.5694444444444444
37 mmlu_high_school_us_history acc,none 0.7156862745098039
38 mmlu_high_school_world_history acc,none 0.7974683544303798
39 mmlu_human_aging acc,none 0.600896860986547
40 mmlu_human_sexuality acc,none 0.6946564885496184
41 mmlu_humanities acc,none 0.5300743889479277
42 mmlu_international_law acc,none 0.7851239669421488
43 mmlu_jurisprudence acc,none 0.7222222222222222
44 mmlu_logical_fallacies acc,none 0.6932515337423313
45 mmlu_machine_learning acc,none 0.42857142857142855
46 mmlu_management acc,none 0.6893203883495146
47 mmlu_marketing acc,none 0.8034188034188035
48 mmlu_medical_genetics acc,none 0.69
49 mmlu_miscellaneous acc,none 0.6717752234993615
50 mmlu_moral_disputes acc,none 0.5953757225433526
51 mmlu_moral_scenarios acc,none 0.2446927374301676
52 mmlu_nutrition acc,none 0.6764705882352942
53 mmlu_other acc,none 0.6269713550048278
54 mmlu_philosophy acc,none 0.6559485530546624
55 mmlu_prehistory acc,none 0.6265432098765432
56 mmlu_professional_accounting acc,none 0.4397163120567376
57 mmlu_professional_law acc,none 0.4745762711864407
58 mmlu_professional_medicine acc,none 0.6838235294117647
59 mmlu_professional_psychology acc,none 0.5915032679738562
60 mmlu_public_relations acc,none 0.6
61 mmlu_security_studies acc,none 0.7020408163265306
62 mmlu_social_sciences acc,none 0.6906077348066298
63 mmlu_sociology acc,none 0.7711442786069652
64 mmlu_stem acc,none 0.5883285759594037
65 mmlu_us_foreign_policy acc,none 0.78
66 mmlu_virology acc,none 0.45180722891566266
67 mmlu_world_religions acc,none 0.7192982456140351
68 piqa acc_norm,none 0.6322089227421109
69 truthfulqa_mc2 acc,none 0.45340473177307805
70 winogrande acc,none 0.500394632991318