初始化项目,由ModelHub XC社区提供模型

Model: bigscience/bloomz-7b1-p3
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-15 07:40:14 +08:00
commit 78a6661ff1
634 changed files with 7477 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ar",
"template_name": "GPT-3 style_arht",
"evaluation": {
"accuracy": 0.3634538152610442
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ar",
"template_name": "MNLI crowdsource_arht",
"evaluation": {
"accuracy": 0.3437751004016064
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='MNLI crowdsource_arht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ar",
"template_name": "can we infer_arht",
"evaluation": {
"accuracy": 0.40923694779116465
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='can we infer_arht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ar",
"template_name": "guaranteed/possible/impossible_arht",
"evaluation": {
"accuracy": 0.342570281124498
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='guaranteed/possible/impossible_arht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ar",
"template_name": "justified in saying_arht",
"evaluation": {
"accuracy": 0.4321285140562249
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "es",
"template_name": "GPT-3 style_esht",
"evaluation": {
"accuracy": 0.4795180722891566
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='GPT-3 style_esht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "es",
"template_name": "MNLI crowdsource_esht",
"evaluation": {
"accuracy": 0.3333333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='MNLI crowdsource_esht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "es",
"template_name": "can we infer_esht",
"evaluation": {
"accuracy": 0.3333333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='can we infer_esht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "es",
"template_name": "guaranteed/possible/impossible_esht",
"evaluation": {
"accuracy": 0.529718875502008
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='guaranteed/possible/impossible_esht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "es",
"template_name": "justified in saying_esht",
"evaluation": {
"accuracy": 0.3333333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='justified in saying_esht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "fr",
"template_name": "GPT-3 style_frht",
"evaluation": {
"accuracy": 0.45863453815261046
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='GPT-3 style_frht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "fr",
"template_name": "MNLI crowdsource_frht",
"evaluation": {
"accuracy": 0.42730923694779116
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='MNLI crowdsource_frht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "fr",
"template_name": "can we infer_frht",
"evaluation": {
"accuracy": 0.40963855421686746
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='can we infer_frht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "fr",
"template_name": "guaranteed/possible/impossible_frht",
"evaluation": {
"accuracy": 0.37309236947791163
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='guaranteed/possible/impossible_frht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "fr",
"template_name": "justified in saying_frht",
"evaluation": {
"accuracy": 0.4710843373493976
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='fr', template_name='justified in saying_frht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "hi",
"template_name": "GPT-3 style_hiht",
"evaluation": {
"accuracy": 0.3542168674698795
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='GPT-3 style_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "hi",
"template_name": "MNLI crowdsource_hiht",
"evaluation": {
"accuracy": 0.4389558232931727
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='MNLI crowdsource_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "hi",
"template_name": "can we infer_hiht",
"evaluation": {
"accuracy": 0.41566265060240964
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='can we infer_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "hi",
"template_name": "guaranteed/possible/impossible_hiht",
"evaluation": {
"accuracy": 0.4927710843373494
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='guaranteed/possible/impossible_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "hi",
"template_name": "justified in saying_hiht",
"evaluation": {
"accuracy": 0.4562248995983936
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='justified in saying_hiht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,50 @@
dataset,prompt,metric,value
xnli_ar,GPT-3 style_arht,accuracy,0.3634538152610442
xnli_ar,MNLI crowdsource_arht,accuracy,0.3437751004016064
xnli_ar,can we infer_arht,accuracy,0.40923694779116465
xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.342570281124498
xnli_ar,justified in saying_arht,accuracy,0.4321285140562249
xnli_ar,median,accuracy,0.3634538152610442
xnli_es,GPT-3 style_esht,accuracy,0.4795180722891566
xnli_es,MNLI crowdsource_esht,accuracy,0.3333333333333333
xnli_es,can we infer_esht,accuracy,0.3333333333333333
xnli_es,guaranteed/possible/impossible_esht,accuracy,0.529718875502008
xnli_es,justified in saying_esht,accuracy,0.3333333333333333
xnli_es,median,accuracy,0.3333333333333333
xnli_fr,GPT-3 style_frht,accuracy,0.45863453815261046
xnli_fr,MNLI crowdsource_frht,accuracy,0.42730923694779116
xnli_fr,can we infer_frht,accuracy,0.40963855421686746
xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.37309236947791163
xnli_fr,justified in saying_frht,accuracy,0.4710843373493976
xnli_fr,median,accuracy,0.42730923694779116
xnli_hi,GPT-3 style_hiht,accuracy,0.3542168674698795
xnli_hi,MNLI crowdsource_hiht,accuracy,0.4389558232931727
xnli_hi,can we infer_hiht,accuracy,0.41566265060240964
xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.4927710843373494
xnli_hi,justified in saying_hiht,accuracy,0.4562248995983936
xnli_hi,median,accuracy,0.4389558232931727
xnli_sw,GPT-3 style_swht,accuracy,0.3389558232931727
xnli_sw,MNLI crowdsource_swht,accuracy,0.3257028112449799
xnli_sw,can we infer_swht,accuracy,0.3429718875502008
xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.3718875502008032
xnli_sw,justified in saying_swht,accuracy,0.3409638554216867
xnli_sw,median,accuracy,0.3409638554216867
xnli_ur,GPT-3 style_urht,accuracy,0.3646586345381526
xnli_ur,MNLI crowdsource_urht,accuracy,0.3538152610441767
xnli_ur,can we infer_urht,accuracy,0.3610441767068273
xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.37670682730923694
xnli_ur,justified in saying_urht,accuracy,0.3377510040160643
xnli_ur,median,accuracy,0.3610441767068273
xnli_vi,GPT-3 style_viht,accuracy,0.3357429718875502
xnli_vi,MNLI crowdsource_viht,accuracy,0.3477911646586345
xnli_vi,can we infer_viht,accuracy,0.3333333333333333
xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.39759036144578314
xnli_vi,justified in saying_viht,accuracy,0.3333333333333333
xnli_vi,median,accuracy,0.3357429718875502
xnli_zh,GPT-3 style_zhht,accuracy,0.348995983935743
xnli_zh,MNLI crowdsource_zhht,accuracy,0.45100401606425705
xnli_zh,can we infer_zhht,accuracy,0.3931726907630522
xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.39879518072289155
xnli_zh,justified in saying_zhht,accuracy,0.3827309236947791
xnli_zh,median,accuracy,0.3931726907630522
multiple,average,multiple,0.3742469879518072
1 dataset prompt metric value
2 xnli_ar GPT-3 style_arht accuracy 0.3634538152610442
3 xnli_ar MNLI crowdsource_arht accuracy 0.3437751004016064
4 xnli_ar can we infer_arht accuracy 0.40923694779116465
5 xnli_ar guaranteed/possible/impossible_arht accuracy 0.342570281124498
6 xnli_ar justified in saying_arht accuracy 0.4321285140562249
7 xnli_ar median accuracy 0.3634538152610442
8 xnli_es GPT-3 style_esht accuracy 0.4795180722891566
9 xnli_es MNLI crowdsource_esht accuracy 0.3333333333333333
10 xnli_es can we infer_esht accuracy 0.3333333333333333
11 xnli_es guaranteed/possible/impossible_esht accuracy 0.529718875502008
12 xnli_es justified in saying_esht accuracy 0.3333333333333333
13 xnli_es median accuracy 0.3333333333333333
14 xnli_fr GPT-3 style_frht accuracy 0.45863453815261046
15 xnli_fr MNLI crowdsource_frht accuracy 0.42730923694779116
16 xnli_fr can we infer_frht accuracy 0.40963855421686746
17 xnli_fr guaranteed/possible/impossible_frht accuracy 0.37309236947791163
18 xnli_fr justified in saying_frht accuracy 0.4710843373493976
19 xnli_fr median accuracy 0.42730923694779116
20 xnli_hi GPT-3 style_hiht accuracy 0.3542168674698795
21 xnli_hi MNLI crowdsource_hiht accuracy 0.4389558232931727
22 xnli_hi can we infer_hiht accuracy 0.41566265060240964
23 xnli_hi guaranteed/possible/impossible_hiht accuracy 0.4927710843373494
24 xnli_hi justified in saying_hiht accuracy 0.4562248995983936
25 xnli_hi median accuracy 0.4389558232931727
26 xnli_sw GPT-3 style_swht accuracy 0.3389558232931727
27 xnli_sw MNLI crowdsource_swht accuracy 0.3257028112449799
28 xnli_sw can we infer_swht accuracy 0.3429718875502008
29 xnli_sw guaranteed/possible/impossible_swht accuracy 0.3718875502008032
30 xnli_sw justified in saying_swht accuracy 0.3409638554216867
31 xnli_sw median accuracy 0.3409638554216867
32 xnli_ur GPT-3 style_urht accuracy 0.3646586345381526
33 xnli_ur MNLI crowdsource_urht accuracy 0.3538152610441767
34 xnli_ur can we infer_urht accuracy 0.3610441767068273
35 xnli_ur guaranteed/possible/impossible_urht accuracy 0.37670682730923694
36 xnli_ur justified in saying_urht accuracy 0.3377510040160643
37 xnli_ur median accuracy 0.3610441767068273
38 xnli_vi GPT-3 style_viht accuracy 0.3357429718875502
39 xnli_vi MNLI crowdsource_viht accuracy 0.3477911646586345
40 xnli_vi can we infer_viht accuracy 0.3333333333333333
41 xnli_vi guaranteed/possible/impossible_viht accuracy 0.39759036144578314
42 xnli_vi justified in saying_viht accuracy 0.3333333333333333
43 xnli_vi median accuracy 0.3357429718875502
44 xnli_zh GPT-3 style_zhht accuracy 0.348995983935743
45 xnli_zh MNLI crowdsource_zhht accuracy 0.45100401606425705
46 xnli_zh can we infer_zhht accuracy 0.3931726907630522
47 xnli_zh guaranteed/possible/impossible_zhht accuracy 0.39879518072289155
48 xnli_zh justified in saying_zhht accuracy 0.3827309236947791
49 xnli_zh median accuracy 0.3931726907630522
50 multiple average multiple 0.3742469879518072

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "sw",
"template_name": "GPT-3 style_swht",
"evaluation": {
"accuracy": 0.3389558232931727
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='GPT-3 style_swht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "sw",
"template_name": "MNLI crowdsource_swht",
"evaluation": {
"accuracy": 0.3257028112449799
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='MNLI crowdsource_swht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "sw",
"template_name": "can we infer_swht",
"evaluation": {
"accuracy": 0.3429718875502008
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='can we infer_swht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "sw",
"template_name": "guaranteed/possible/impossible_swht",
"evaluation": {
"accuracy": 0.3718875502008032
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='guaranteed/possible/impossible_swht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "sw",
"template_name": "justified in saying_swht",
"evaluation": {
"accuracy": 0.3409638554216867
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='justified in saying_swht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ur",
"template_name": "GPT-3 style_urht",
"evaluation": {
"accuracy": 0.3646586345381526
},
"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ur",
"template_name": "MNLI crowdsource_urht",
"evaluation": {
"accuracy": 0.3538152610441767
},
"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ur",
"template_name": "can we infer_urht",
"evaluation": {
"accuracy": 0.3610441767068273
},
"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ur",
"template_name": "guaranteed/possible/impossible_urht",
"evaluation": {
"accuracy": 0.37670682730923694
},
"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "ur",
"template_name": "justified in saying_urht",
"evaluation": {
"accuracy": 0.3377510040160643
},
"arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "vi",
"template_name": "GPT-3 style_viht",
"evaluation": {
"accuracy": 0.3357429718875502
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "vi",
"template_name": "MNLI crowdsource_viht",
"evaluation": {
"accuracy": 0.3477911646586345
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "vi",
"template_name": "can we infer_viht",
"evaluation": {
"accuracy": 0.3333333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='can we infer_viht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "vi",
"template_name": "guaranteed/possible/impossible_viht",
"evaluation": {
"accuracy": 0.39759036144578314
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='guaranteed/possible/impossible_viht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "vi",
"template_name": "justified in saying_viht",
"evaluation": {
"accuracy": 0.3333333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='justified in saying_viht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "zh",
"template_name": "GPT-3 style_zhht",
"evaluation": {
"accuracy": 0.348995983935743
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='GPT-3 style_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "zh",
"template_name": "MNLI crowdsource_zhht",
"evaluation": {
"accuracy": 0.45100401606425705
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='MNLI crowdsource_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "zh",
"template_name": "can we infer_zhht",
"evaluation": {
"accuracy": 0.3931726907630522
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='can we infer_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "zh",
"template_name": "guaranteed/possible/impossible_zhht",
"evaluation": {
"accuracy": 0.39879518072289155
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='guaranteed/possible/impossible_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xnli",
"dataset_config_name": "zh",
"template_name": "justified in saying_zhht",
"evaluation": {
"accuracy": 0.3827309236947791
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='justified in saying_zhht', tokenizer_name=None, use_slow_tokenizer=False)"
}