初始化项目,由ModelHub XC社区提供模型

Model: bigscience/bloomz-7b1-p3
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-15 07:40:14 +08:00
commit 78a6661ff1
634 changed files with 7477 additions and 0 deletions

View File

@@ -0,0 +1,86 @@
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.51
xcopa_id,best_option_idmt,accuracy,0.53
xcopa_id,cause_effect_idmt,accuracy,0.69
xcopa_id,i_am_hesitating_idmt,accuracy,0.64
xcopa_id,plausible_alternatives_idmt,accuracy,0.7
xcopa_id,median,accuracy,0.64
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.62
xcopa_sw,cause_effect_swmt,accuracy,0.49
xcopa_sw,i_am_hesitating_swmt,accuracy,0.56
xcopa_sw,plausible_alternatives_swmt,accuracy,0.54
xcopa_sw,median,accuracy,0.56
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.52
xcopa_ta,best_option_tamt,accuracy,0.55
xcopa_ta,cause_effect_tamt,accuracy,0.63
xcopa_ta,i_am_hesitating_tamt,accuracy,0.63
xcopa_ta,plausible_alternatives_tamt,accuracy,0.66
xcopa_ta,median,accuracy,0.63
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.55
xcopa_vi,best_option_vimt,accuracy,0.61
xcopa_vi,cause_effect_vimt,accuracy,0.64
xcopa_vi,i_am_hesitating_vimt,accuracy,0.6
xcopa_vi,plausible_alternatives_vimt,accuracy,0.64
xcopa_vi,median,accuracy,0.61
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.52
xcopa_zh,best_option_zhmt,accuracy,0.61
xcopa_zh,cause_effect_zhmt,accuracy,0.75
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.72
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.76
xcopa_zh,median,accuracy,0.72
xstory_cloze_ar,Answer Given options_armt,accuracy,0.7061548643282595
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.786896095301125
xstory_cloze_ar,Generate Ending_armt,accuracy,0.600926538716082
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.7511581733951026
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.757114493712773
xstory_cloze_ar,median,accuracy,0.7511581733951026
xstory_cloze_es,Answer Given options_esmt,accuracy,0.7902051621442753
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8160158835208471
xstory_cloze_es,Generate Ending_esmt,accuracy,0.657180675049636
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.784910655195235
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.7696889477167439
xstory_cloze_es,median,accuracy,0.784910655195235
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.6227663798808736
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.6763732627399074
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5737921906022502
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.686300463269358
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.6637988087359364
xstory_cloze_eu,median,accuracy,0.6637988087359364
xstory_cloze_hi,Answer Given options_himt,accuracy,0.6697551290536069
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.7160820648577101
xstory_cloze_hi,Generate Ending_himt,accuracy,0.5923229649238915
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.6882859033752482
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7048312375909993
xstory_cloze_hi,median,accuracy,0.6882859033752482
xstory_cloze_id,Answer Given options_idmt,accuracy,0.7346128391793514
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.7511581733951026
xstory_cloze_id,Generate Ending_idmt,accuracy,0.6201191264063534
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.728656518861681
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.7412309728656519
xstory_cloze_id,median,accuracy,0.7346128391793514
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7425545996029119
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.7941760423560555
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6247518199867638
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.7842488418266049
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8034414295168762
xstory_cloze_zh,median,accuracy,0.7842488418266049
xwinograd_fr,Replace_frmt,accuracy,0.5180722891566265
xwinograd_fr,True or False_frmt,accuracy,0.46987951807228917
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,stand for_frmt,accuracy,0.5060240963855421
xwinograd_fr,underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,median,accuracy,0.5180722891566265
xwinograd_pt,Replace_ptmt,accuracy,0.5057034220532319
xwinograd_pt,True or False_ptmt,accuracy,0.5133079847908745
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5209125475285171
xwinograd_pt,stand for_ptmt,accuracy,0.5209125475285171
xwinograd_pt,underscore refer to_ptmt,accuracy,0.49049429657794674
xwinograd_pt,median,accuracy,0.5133079847908745
xwinograd_zh,Replace_zhmt,accuracy,0.5238095238095238
xwinograd_zh,True or False_zhmt,accuracy,0.5138888888888888
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.49404761904761907
xwinograd_zh,stand for_zhmt,accuracy,0.49603174603174605
xwinograd_zh,underscore refer to_zhmt,accuracy,0.503968253968254
xwinograd_zh,median,accuracy,0.503968253968254
multiple,average,multiple,0.6501688392588024
1 dataset prompt metric value
2 xcopa_id C1 or C2? premise_idmt accuracy 0.51
3 xcopa_id best_option_idmt accuracy 0.53
4 xcopa_id cause_effect_idmt accuracy 0.69
5 xcopa_id i_am_hesitating_idmt accuracy 0.64
6 xcopa_id plausible_alternatives_idmt accuracy 0.7
7 xcopa_id median accuracy 0.64
8 xcopa_sw C1 or C2? premise_swmt accuracy 0.6
9 xcopa_sw best_option_swmt accuracy 0.62
10 xcopa_sw cause_effect_swmt accuracy 0.49
11 xcopa_sw i_am_hesitating_swmt accuracy 0.56
12 xcopa_sw plausible_alternatives_swmt accuracy 0.54
13 xcopa_sw median accuracy 0.56
14 xcopa_ta C1 or C2? premise_tamt accuracy 0.52
15 xcopa_ta best_option_tamt accuracy 0.55
16 xcopa_ta cause_effect_tamt accuracy 0.63
17 xcopa_ta i_am_hesitating_tamt accuracy 0.63
18 xcopa_ta plausible_alternatives_tamt accuracy 0.66
19 xcopa_ta median accuracy 0.63
20 xcopa_vi C1 or C2? premise_vimt accuracy 0.55
21 xcopa_vi best_option_vimt accuracy 0.61
22 xcopa_vi cause_effect_vimt accuracy 0.64
23 xcopa_vi i_am_hesitating_vimt accuracy 0.6
24 xcopa_vi plausible_alternatives_vimt accuracy 0.64
25 xcopa_vi median accuracy 0.61
26 xcopa_zh C1 or C2? premise_zhmt accuracy 0.52
27 xcopa_zh best_option_zhmt accuracy 0.61
28 xcopa_zh cause_effect_zhmt accuracy 0.75
29 xcopa_zh i_am_hesitating_zhmt accuracy 0.72
30 xcopa_zh plausible_alternatives_zhmt accuracy 0.76
31 xcopa_zh median accuracy 0.72
32 xstory_cloze_ar Answer Given options_armt accuracy 0.7061548643282595
33 xstory_cloze_ar Choose Story Ending_armt accuracy 0.786896095301125
34 xstory_cloze_ar Generate Ending_armt accuracy 0.600926538716082
35 xstory_cloze_ar Novel Correct Ending_armt accuracy 0.7511581733951026
36 xstory_cloze_ar Story Continuation and Options_armt accuracy 0.757114493712773
37 xstory_cloze_ar median accuracy 0.7511581733951026
38 xstory_cloze_es Answer Given options_esmt accuracy 0.7902051621442753
39 xstory_cloze_es Choose Story Ending_esmt accuracy 0.8160158835208471
40 xstory_cloze_es Generate Ending_esmt accuracy 0.657180675049636
41 xstory_cloze_es Novel Correct Ending_esmt accuracy 0.784910655195235
42 xstory_cloze_es Story Continuation and Options_esmt accuracy 0.7696889477167439
43 xstory_cloze_es median accuracy 0.784910655195235
44 xstory_cloze_eu Answer Given options_eumt accuracy 0.6227663798808736
45 xstory_cloze_eu Choose Story Ending_eumt accuracy 0.6763732627399074
46 xstory_cloze_eu Generate Ending_eumt accuracy 0.5737921906022502
47 xstory_cloze_eu Novel Correct Ending_eumt accuracy 0.686300463269358
48 xstory_cloze_eu Story Continuation and Options_eumt accuracy 0.6637988087359364
49 xstory_cloze_eu median accuracy 0.6637988087359364
50 xstory_cloze_hi Answer Given options_himt accuracy 0.6697551290536069
51 xstory_cloze_hi Choose Story Ending_himt accuracy 0.7160820648577101
52 xstory_cloze_hi Generate Ending_himt accuracy 0.5923229649238915
53 xstory_cloze_hi Novel Correct Ending_himt accuracy 0.6882859033752482
54 xstory_cloze_hi Story Continuation and Options_himt accuracy 0.7048312375909993
55 xstory_cloze_hi median accuracy 0.6882859033752482
56 xstory_cloze_id Answer Given options_idmt accuracy 0.7346128391793514
57 xstory_cloze_id Choose Story Ending_idmt accuracy 0.7511581733951026
58 xstory_cloze_id Generate Ending_idmt accuracy 0.6201191264063534
59 xstory_cloze_id Novel Correct Ending_idmt accuracy 0.728656518861681
60 xstory_cloze_id Story Continuation and Options_idmt accuracy 0.7412309728656519
61 xstory_cloze_id median accuracy 0.7346128391793514
62 xstory_cloze_zh Answer Given options_zhmt accuracy 0.7425545996029119
63 xstory_cloze_zh Choose Story Ending_zhmt accuracy 0.7941760423560555
64 xstory_cloze_zh Generate Ending_zhmt accuracy 0.6247518199867638
65 xstory_cloze_zh Novel Correct Ending_zhmt accuracy 0.7842488418266049
66 xstory_cloze_zh Story Continuation and Options_zhmt accuracy 0.8034414295168762
67 xstory_cloze_zh median accuracy 0.7842488418266049
68 xwinograd_fr Replace_frmt accuracy 0.5180722891566265
69 xwinograd_fr True or False_frmt accuracy 0.46987951807228917
70 xwinograd_fr does underscore refer to_frmt accuracy 0.5421686746987951
71 xwinograd_fr stand for_frmt accuracy 0.5060240963855421
72 xwinograd_fr underscore refer to_frmt accuracy 0.5421686746987951
73 xwinograd_fr median accuracy 0.5180722891566265
74 xwinograd_pt Replace_ptmt accuracy 0.5057034220532319
75 xwinograd_pt True or False_ptmt accuracy 0.5133079847908745
76 xwinograd_pt does underscore refer to_ptmt accuracy 0.5209125475285171
77 xwinograd_pt stand for_ptmt accuracy 0.5209125475285171
78 xwinograd_pt underscore refer to_ptmt accuracy 0.49049429657794674
79 xwinograd_pt median accuracy 0.5133079847908745
80 xwinograd_zh Replace_zhmt accuracy 0.5238095238095238
81 xwinograd_zh True or False_zhmt accuracy 0.5138888888888888
82 xwinograd_zh does underscore refer to_zhmt accuracy 0.49404761904761907
83 xwinograd_zh stand for_zhmt accuracy 0.49603174603174605
84 xwinograd_zh underscore refer to_zhmt accuracy 0.503968253968254
85 xwinograd_zh median accuracy 0.503968253968254
86 multiple average multiple 0.6501688392588024