初始化项目,由ModelHub XC社区提供模型
Model: bigscience/bloomz-7b1-p3 Source: Original Platform
This commit is contained in:
50
evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.csv
Normal file
50
evaluation_bloomz-7b1-p3/evaluation_xnliht/xnli/merged.csv
Normal file
@@ -0,0 +1,50 @@
|
||||
dataset,prompt,metric,value
|
||||
xnli_ar,GPT-3 style_arht,accuracy,0.3634538152610442
|
||||
xnli_ar,MNLI crowdsource_arht,accuracy,0.3437751004016064
|
||||
xnli_ar,can we infer_arht,accuracy,0.40923694779116465
|
||||
xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.342570281124498
|
||||
xnli_ar,justified in saying_arht,accuracy,0.4321285140562249
|
||||
xnli_ar,median,accuracy,0.3634538152610442
|
||||
xnli_es,GPT-3 style_esht,accuracy,0.4795180722891566
|
||||
xnli_es,MNLI crowdsource_esht,accuracy,0.3333333333333333
|
||||
xnli_es,can we infer_esht,accuracy,0.3333333333333333
|
||||
xnli_es,guaranteed/possible/impossible_esht,accuracy,0.529718875502008
|
||||
xnli_es,justified in saying_esht,accuracy,0.3333333333333333
|
||||
xnli_es,median,accuracy,0.3333333333333333
|
||||
xnli_fr,GPT-3 style_frht,accuracy,0.45863453815261046
|
||||
xnli_fr,MNLI crowdsource_frht,accuracy,0.42730923694779116
|
||||
xnli_fr,can we infer_frht,accuracy,0.40963855421686746
|
||||
xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.37309236947791163
|
||||
xnli_fr,justified in saying_frht,accuracy,0.4710843373493976
|
||||
xnli_fr,median,accuracy,0.42730923694779116
|
||||
xnli_hi,GPT-3 style_hiht,accuracy,0.3542168674698795
|
||||
xnli_hi,MNLI crowdsource_hiht,accuracy,0.4389558232931727
|
||||
xnli_hi,can we infer_hiht,accuracy,0.41566265060240964
|
||||
xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.4927710843373494
|
||||
xnli_hi,justified in saying_hiht,accuracy,0.4562248995983936
|
||||
xnli_hi,median,accuracy,0.4389558232931727
|
||||
xnli_sw,GPT-3 style_swht,accuracy,0.3389558232931727
|
||||
xnli_sw,MNLI crowdsource_swht,accuracy,0.3257028112449799
|
||||
xnli_sw,can we infer_swht,accuracy,0.3429718875502008
|
||||
xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.3718875502008032
|
||||
xnli_sw,justified in saying_swht,accuracy,0.3409638554216867
|
||||
xnli_sw,median,accuracy,0.3409638554216867
|
||||
xnli_ur,GPT-3 style_urht,accuracy,0.3646586345381526
|
||||
xnli_ur,MNLI crowdsource_urht,accuracy,0.3538152610441767
|
||||
xnli_ur,can we infer_urht,accuracy,0.3610441767068273
|
||||
xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.37670682730923694
|
||||
xnli_ur,justified in saying_urht,accuracy,0.3377510040160643
|
||||
xnli_ur,median,accuracy,0.3610441767068273
|
||||
xnli_vi,GPT-3 style_viht,accuracy,0.3357429718875502
|
||||
xnli_vi,MNLI crowdsource_viht,accuracy,0.3477911646586345
|
||||
xnli_vi,can we infer_viht,accuracy,0.3333333333333333
|
||||
xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.39759036144578314
|
||||
xnli_vi,justified in saying_viht,accuracy,0.3333333333333333
|
||||
xnli_vi,median,accuracy,0.3357429718875502
|
||||
xnli_zh,GPT-3 style_zhht,accuracy,0.348995983935743
|
||||
xnli_zh,MNLI crowdsource_zhht,accuracy,0.45100401606425705
|
||||
xnli_zh,can we infer_zhht,accuracy,0.3931726907630522
|
||||
xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.39879518072289155
|
||||
xnli_zh,justified in saying_zhht,accuracy,0.3827309236947791
|
||||
xnli_zh,median,accuracy,0.3931726907630522
|
||||
multiple,average,multiple,0.3742469879518072
|
||||
|
Reference in New Issue
Block a user