Files
ModelHub XC 7c36fbd792 初始化项目,由ModelHub XC社区提供模型
Model: strykes/emberforge-3b-reasoner
Source: Original Platform
2026-05-30 19:09:18 +08:00

3.2 KiB

1taskmetricvalue
2arc_challengeacc_norm,none0.3174061433447099
3boolqacc,none0.7437308868501529
4gsm8kexact_match,flexible-extract0.6239575435936315
5hellaswagacc_norm,none0.560744871539534
6mmluacc,none0.5997721122347244
7mmlu_abstract_algebraacc,none0.43
8mmlu_anatomyacc,none0.6074074074074074
9mmlu_astronomyacc,none0.6973684210526315
10mmlu_business_ethicsacc,none0.62
11mmlu_clinical_knowledgeacc,none0.6415094339622641
12mmlu_college_biologyacc,none0.8263888888888888
13mmlu_college_chemistryacc,none0.53
14mmlu_college_computer_scienceacc,none0.54
15mmlu_college_mathematicsacc,none0.5
16mmlu_college_medicineacc,none0.5953757225433526
17mmlu_college_physicsacc,none0.5
18mmlu_computer_securityacc,none0.68
19mmlu_conceptual_physicsacc,none0.5872340425531914
20mmlu_econometricsacc,none0.35964912280701755
21mmlu_electrical_engineeringacc,none0.6413793103448275
22mmlu_elementary_mathematicsacc,none0.5317460317460317
23mmlu_formal_logicacc,none0.5
24mmlu_global_factsacc,none0.33
25mmlu_high_school_biologyacc,none0.7548387096774194
26mmlu_high_school_chemistryacc,none0.6009852216748769
27mmlu_high_school_computer_scienceacc,none0.69
28mmlu_high_school_european_historyacc,none0.7696969696969697
29mmlu_high_school_geographyacc,none0.7272727272727273
30mmlu_high_school_government_and_politicsacc,none0.7461139896373057
31mmlu_high_school_macroeconomicsacc,none0.6435897435897436
32mmlu_high_school_mathematicsacc,none0.45555555555555555
33mmlu_high_school_microeconomicsacc,none0.7773109243697479
34mmlu_high_school_physicsacc,none0.5165562913907285
35mmlu_high_school_psychologyacc,none0.8
36mmlu_high_school_statisticsacc,none0.5694444444444444
37mmlu_high_school_us_historyacc,none0.7156862745098039
38mmlu_high_school_world_historyacc,none0.7974683544303798
39mmlu_human_agingacc,none0.600896860986547
40mmlu_human_sexualityacc,none0.6946564885496184
41mmlu_humanitiesacc,none0.5300743889479277
42mmlu_international_lawacc,none0.7851239669421488
43mmlu_jurisprudenceacc,none0.7222222222222222
44mmlu_logical_fallaciesacc,none0.6932515337423313
45mmlu_machine_learningacc,none0.42857142857142855
46mmlu_managementacc,none0.6893203883495146
47mmlu_marketingacc,none0.8034188034188035
48mmlu_medical_geneticsacc,none0.69
49mmlu_miscellaneousacc,none0.6717752234993615
50mmlu_moral_disputesacc,none0.5953757225433526
51mmlu_moral_scenariosacc,none0.2446927374301676
52mmlu_nutritionacc,none0.6764705882352942
53mmlu_otheracc,none0.6269713550048278
54mmlu_philosophyacc,none0.6559485530546624
55mmlu_prehistoryacc,none0.6265432098765432
56mmlu_professional_accountingacc,none0.4397163120567376
57mmlu_professional_lawacc,none0.4745762711864407
58mmlu_professional_medicineacc,none0.6838235294117647
59mmlu_professional_psychologyacc,none0.5915032679738562
60mmlu_public_relationsacc,none0.6
61mmlu_security_studiesacc,none0.7020408163265306
62mmlu_social_sciencesacc,none0.6906077348066298
63mmlu_sociologyacc,none0.7711442786069652
64mmlu_stemacc,none0.5883285759594037
65mmlu_us_foreign_policyacc,none0.78
66mmlu_virologyacc,none0.45180722891566266
67mmlu_world_religionsacc,none0.7192982456140351
68piqaacc_norm,none0.6322089227421109
69truthfulqa_mc2acc,none0.45340473177307805
70winograndeacc,none0.500394632991318