初始化项目,由ModelHub XC社区提供模型

Model: bigscience/bloomz-7b1-p3
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-15 07:40:14 +08:00
commit 78a6661ff1
634 changed files with 7477 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Answer Given options_armt",
"evaluation": {
"accuracy": 0.7061548643282595
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Answer Given options_armt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Choose Story Ending_armt",
"evaluation": {
"accuracy": 0.786896095301125
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Choose Story Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Generate Ending_armt",
"evaluation": {
"accuracy": 0.600926538716082
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Generate Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Novel Correct Ending_armt",
"evaluation": {
"accuracy": 0.7511581733951026
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Novel Correct Ending_armt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Story Continuation and Options_armt",
"evaluation": {
"accuracy": 0.757114493712773
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='Story Continuation and Options_armt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Answer Given options_esmt",
"evaluation": {
"accuracy": 0.7902051621442753
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Answer Given options_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Choose Story Ending_esmt",
"evaluation": {
"accuracy": 0.8160158835208471
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Choose Story Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Generate Ending_esmt",
"evaluation": {
"accuracy": 0.657180675049636
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Generate Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Novel Correct Ending_esmt",
"evaluation": {
"accuracy": 0.784910655195235
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Novel Correct Ending_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Story Continuation and Options_esmt",
"evaluation": {
"accuracy": 0.7696889477167439
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='es', template_name='Story Continuation and Options_esmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Answer Given options_eumt",
"evaluation": {
"accuracy": 0.6227663798808736
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Answer Given options_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Choose Story Ending_eumt",
"evaluation": {
"accuracy": 0.6763732627399074
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Choose Story Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Generate Ending_eumt",
"evaluation": {
"accuracy": 0.5737921906022502
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Generate Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Novel Correct Ending_eumt",
"evaluation": {
"accuracy": 0.686300463269358
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Novel Correct Ending_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Story Continuation and Options_eumt",
"evaluation": {
"accuracy": 0.6637988087359364
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='eu', template_name='Story Continuation and Options_eumt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Answer Given options_himt",
"evaluation": {
"accuracy": 0.6697551290536069
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Answer Given options_himt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Choose Story Ending_himt",
"evaluation": {
"accuracy": 0.7160820648577101
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Choose Story Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Generate Ending_himt",
"evaluation": {
"accuracy": 0.5923229649238915
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Generate Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Novel Correct Ending_himt",
"evaluation": {
"accuracy": 0.6882859033752482
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Novel Correct Ending_himt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Story Continuation and Options_himt",
"evaluation": {
"accuracy": 0.7048312375909993
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='hi', template_name='Story Continuation and Options_himt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Answer Given options_idmt",
"evaluation": {
"accuracy": 0.7346128391793514
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Answer Given options_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Choose Story Ending_idmt",
"evaluation": {
"accuracy": 0.7511581733951026
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Choose Story Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Generate Ending_idmt",
"evaluation": {
"accuracy": 0.6201191264063534
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Generate Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Novel Correct Ending_idmt",
"evaluation": {
"accuracy": 0.728656518861681
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Novel Correct Ending_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Story Continuation and Options_idmt",
"evaluation": {
"accuracy": 0.7412309728656519
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='Story Continuation and Options_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Answer Given options_zhmt",
"evaluation": {
"accuracy": 0.7425545996029119
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Answer Given options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Choose Story Ending_zhmt",
"evaluation": {
"accuracy": 0.7941760423560555
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Generate Ending_zhmt",
"evaluation": {
"accuracy": 0.6247518199867638
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Novel Correct Ending_zhmt",
"evaluation": {
"accuracy": 0.7842488418266049
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Story Continuation and Options_zhmt",
"evaluation": {
"accuracy": 0.8034414295168762
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Story Continuation and Options_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "Replace_frmt",
"evaluation": {
"accuracy": 0.5180722891566265
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "True or False_frmt",
"evaluation": {
"accuracy": 0.46987951807228917
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='True or False_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "does underscore refer to_frmt",
"evaluation": {
"accuracy": 0.5421686746987951
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='does underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "stand for_frmt",
"evaluation": {
"accuracy": 0.5060240963855421
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "underscore refer to_frmt",
"evaluation": {
"accuracy": 0.5421686746987951
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "Replace_ptmt",
"evaluation": {
"accuracy": 0.5057034220532319
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='Replace_ptmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "True or False_ptmt",
"evaluation": {
"accuracy": 0.5133079847908745
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='True or False_ptmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "does underscore refer to_ptmt",
"evaluation": {
"accuracy": 0.5209125475285171
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='does underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "stand for_ptmt",
"evaluation": {
"accuracy": 0.5209125475285171
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='stand for_ptmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "underscore refer to_ptmt",
"evaluation": {
"accuracy": 0.49049429657794674
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='pt', template_name='underscore refer to_ptmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "Replace_zhmt",
"evaluation": {
"accuracy": 0.5238095238095238
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='Replace_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "True or False_zhmt",
"evaluation": {
"accuracy": 0.5138888888888888
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='True or False_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "does underscore refer to_zhmt",
"evaluation": {
"accuracy": 0.49404761904761907
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='does underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "stand for_zhmt",
"evaluation": {
"accuracy": 0.49603174603174605
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='stand for_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "underscore refer to_zhmt",
"evaluation": {
"accuracy": 0.503968253968254
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='zh', template_name='underscore refer to_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,86 @@
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.51
xcopa_id,best_option_idmt,accuracy,0.53
xcopa_id,cause_effect_idmt,accuracy,0.69
xcopa_id,i_am_hesitating_idmt,accuracy,0.64
xcopa_id,plausible_alternatives_idmt,accuracy,0.7
xcopa_id,median,accuracy,0.64
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.62
xcopa_sw,cause_effect_swmt,accuracy,0.49
xcopa_sw,i_am_hesitating_swmt,accuracy,0.56
xcopa_sw,plausible_alternatives_swmt,accuracy,0.54
xcopa_sw,median,accuracy,0.56
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.52
xcopa_ta,best_option_tamt,accuracy,0.55
xcopa_ta,cause_effect_tamt,accuracy,0.63
xcopa_ta,i_am_hesitating_tamt,accuracy,0.63
xcopa_ta,plausible_alternatives_tamt,accuracy,0.66
xcopa_ta,median,accuracy,0.63
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.55
xcopa_vi,best_option_vimt,accuracy,0.61
xcopa_vi,cause_effect_vimt,accuracy,0.64
xcopa_vi,i_am_hesitating_vimt,accuracy,0.6
xcopa_vi,plausible_alternatives_vimt,accuracy,0.64
xcopa_vi,median,accuracy,0.61
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.52
xcopa_zh,best_option_zhmt,accuracy,0.61
xcopa_zh,cause_effect_zhmt,accuracy,0.75
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.72
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.76
xcopa_zh,median,accuracy,0.72
xstory_cloze_ar,Answer Given options_armt,accuracy,0.7061548643282595
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.786896095301125
xstory_cloze_ar,Generate Ending_armt,accuracy,0.600926538716082
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.7511581733951026
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.757114493712773
xstory_cloze_ar,median,accuracy,0.7511581733951026
xstory_cloze_es,Answer Given options_esmt,accuracy,0.7902051621442753
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8160158835208471
xstory_cloze_es,Generate Ending_esmt,accuracy,0.657180675049636
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.784910655195235
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.7696889477167439
xstory_cloze_es,median,accuracy,0.784910655195235
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.6227663798808736
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.6763732627399074
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5737921906022502
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.686300463269358
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.6637988087359364
xstory_cloze_eu,median,accuracy,0.6637988087359364
xstory_cloze_hi,Answer Given options_himt,accuracy,0.6697551290536069
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.7160820648577101
xstory_cloze_hi,Generate Ending_himt,accuracy,0.5923229649238915
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.6882859033752482
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7048312375909993
xstory_cloze_hi,median,accuracy,0.6882859033752482
xstory_cloze_id,Answer Given options_idmt,accuracy,0.7346128391793514
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.7511581733951026
xstory_cloze_id,Generate Ending_idmt,accuracy,0.6201191264063534
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.728656518861681
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.7412309728656519
xstory_cloze_id,median,accuracy,0.7346128391793514
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7425545996029119
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.7941760423560555
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6247518199867638
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.7842488418266049
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8034414295168762
xstory_cloze_zh,median,accuracy,0.7842488418266049
xwinograd_fr,Replace_frmt,accuracy,0.5180722891566265
xwinograd_fr,True or False_frmt,accuracy,0.46987951807228917
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,stand for_frmt,accuracy,0.5060240963855421
xwinograd_fr,underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,median,accuracy,0.5180722891566265
xwinograd_pt,Replace_ptmt,accuracy,0.5057034220532319
xwinograd_pt,True or False_ptmt,accuracy,0.5133079847908745
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5209125475285171
xwinograd_pt,stand for_ptmt,accuracy,0.5209125475285171
xwinograd_pt,underscore refer to_ptmt,accuracy,0.49049429657794674
xwinograd_pt,median,accuracy,0.5133079847908745
xwinograd_zh,Replace_zhmt,accuracy,0.5238095238095238
xwinograd_zh,True or False_zhmt,accuracy,0.5138888888888888
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.49404761904761907
xwinograd_zh,stand for_zhmt,accuracy,0.49603174603174605
xwinograd_zh,underscore refer to_zhmt,accuracy,0.503968253968254
xwinograd_zh,median,accuracy,0.503968253968254
multiple,average,multiple,0.6501688392588024
1 dataset prompt metric value
2 xcopa_id C1 or C2? premise_idmt accuracy 0.51
3 xcopa_id best_option_idmt accuracy 0.53
4 xcopa_id cause_effect_idmt accuracy 0.69
5 xcopa_id i_am_hesitating_idmt accuracy 0.64
6 xcopa_id plausible_alternatives_idmt accuracy 0.7
7 xcopa_id median accuracy 0.64
8 xcopa_sw C1 or C2? premise_swmt accuracy 0.6
9 xcopa_sw best_option_swmt accuracy 0.62
10 xcopa_sw cause_effect_swmt accuracy 0.49
11 xcopa_sw i_am_hesitating_swmt accuracy 0.56
12 xcopa_sw plausible_alternatives_swmt accuracy 0.54
13 xcopa_sw median accuracy 0.56
14 xcopa_ta C1 or C2? premise_tamt accuracy 0.52
15 xcopa_ta best_option_tamt accuracy 0.55
16 xcopa_ta cause_effect_tamt accuracy 0.63
17 xcopa_ta i_am_hesitating_tamt accuracy 0.63
18 xcopa_ta plausible_alternatives_tamt accuracy 0.66
19 xcopa_ta median accuracy 0.63
20 xcopa_vi C1 or C2? premise_vimt accuracy 0.55
21 xcopa_vi best_option_vimt accuracy 0.61
22 xcopa_vi cause_effect_vimt accuracy 0.64
23 xcopa_vi i_am_hesitating_vimt accuracy 0.6
24 xcopa_vi plausible_alternatives_vimt accuracy 0.64
25 xcopa_vi median accuracy 0.61
26 xcopa_zh C1 or C2? premise_zhmt accuracy 0.52
27 xcopa_zh best_option_zhmt accuracy 0.61
28 xcopa_zh cause_effect_zhmt accuracy 0.75
29 xcopa_zh i_am_hesitating_zhmt accuracy 0.72
30 xcopa_zh plausible_alternatives_zhmt accuracy 0.76
31 xcopa_zh median accuracy 0.72
32 xstory_cloze_ar Answer Given options_armt accuracy 0.7061548643282595
33 xstory_cloze_ar Choose Story Ending_armt accuracy 0.786896095301125
34 xstory_cloze_ar Generate Ending_armt accuracy 0.600926538716082
35 xstory_cloze_ar Novel Correct Ending_armt accuracy 0.7511581733951026
36 xstory_cloze_ar Story Continuation and Options_armt accuracy 0.757114493712773
37 xstory_cloze_ar median accuracy 0.7511581733951026
38 xstory_cloze_es Answer Given options_esmt accuracy 0.7902051621442753
39 xstory_cloze_es Choose Story Ending_esmt accuracy 0.8160158835208471
40 xstory_cloze_es Generate Ending_esmt accuracy 0.657180675049636
41 xstory_cloze_es Novel Correct Ending_esmt accuracy 0.784910655195235
42 xstory_cloze_es Story Continuation and Options_esmt accuracy 0.7696889477167439
43 xstory_cloze_es median accuracy 0.784910655195235
44 xstory_cloze_eu Answer Given options_eumt accuracy 0.6227663798808736
45 xstory_cloze_eu Choose Story Ending_eumt accuracy 0.6763732627399074
46 xstory_cloze_eu Generate Ending_eumt accuracy 0.5737921906022502
47 xstory_cloze_eu Novel Correct Ending_eumt accuracy 0.686300463269358
48 xstory_cloze_eu Story Continuation and Options_eumt accuracy 0.6637988087359364
49 xstory_cloze_eu median accuracy 0.6637988087359364
50 xstory_cloze_hi Answer Given options_himt accuracy 0.6697551290536069
51 xstory_cloze_hi Choose Story Ending_himt accuracy 0.7160820648577101
52 xstory_cloze_hi Generate Ending_himt accuracy 0.5923229649238915
53 xstory_cloze_hi Novel Correct Ending_himt accuracy 0.6882859033752482
54 xstory_cloze_hi Story Continuation and Options_himt accuracy 0.7048312375909993
55 xstory_cloze_hi median accuracy 0.6882859033752482
56 xstory_cloze_id Answer Given options_idmt accuracy 0.7346128391793514
57 xstory_cloze_id Choose Story Ending_idmt accuracy 0.7511581733951026
58 xstory_cloze_id Generate Ending_idmt accuracy 0.6201191264063534
59 xstory_cloze_id Novel Correct Ending_idmt accuracy 0.728656518861681
60 xstory_cloze_id Story Continuation and Options_idmt accuracy 0.7412309728656519
61 xstory_cloze_id median accuracy 0.7346128391793514
62 xstory_cloze_zh Answer Given options_zhmt accuracy 0.7425545996029119
63 xstory_cloze_zh Choose Story Ending_zhmt accuracy 0.7941760423560555
64 xstory_cloze_zh Generate Ending_zhmt accuracy 0.6247518199867638
65 xstory_cloze_zh Novel Correct Ending_zhmt accuracy 0.7842488418266049
66 xstory_cloze_zh Story Continuation and Options_zhmt accuracy 0.8034414295168762
67 xstory_cloze_zh median accuracy 0.7842488418266049
68 xwinograd_fr Replace_frmt accuracy 0.5180722891566265
69 xwinograd_fr True or False_frmt accuracy 0.46987951807228917
70 xwinograd_fr does underscore refer to_frmt accuracy 0.5421686746987951
71 xwinograd_fr stand for_frmt accuracy 0.5060240963855421
72 xwinograd_fr underscore refer to_frmt accuracy 0.5421686746987951
73 xwinograd_fr median accuracy 0.5180722891566265
74 xwinograd_pt Replace_ptmt accuracy 0.5057034220532319
75 xwinograd_pt True or False_ptmt accuracy 0.5133079847908745
76 xwinograd_pt does underscore refer to_ptmt accuracy 0.5209125475285171
77 xwinograd_pt stand for_ptmt accuracy 0.5209125475285171
78 xwinograd_pt underscore refer to_ptmt accuracy 0.49049429657794674
79 xwinograd_pt median accuracy 0.5133079847908745
80 xwinograd_zh Replace_zhmt accuracy 0.5238095238095238
81 xwinograd_zh True or False_zhmt accuracy 0.5138888888888888
82 xwinograd_zh does underscore refer to_zhmt accuracy 0.49404761904761907
83 xwinograd_zh stand for_zhmt accuracy 0.49603174603174605
84 xwinograd_zh underscore refer to_zhmt accuracy 0.503968253968254
85 xwinograd_zh median accuracy 0.503968253968254
86 multiple average multiple 0.6501688392588024

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "C1 or C2? premise_idmt",
"evaluation": {
"accuracy": 0.51
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='C1 or C2? premise_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "best_option_idmt",
"evaluation": {
"accuracy": 0.53
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='best_option_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "cause_effect_idmt",
"evaluation": {
"accuracy": 0.69
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='cause_effect_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "i_am_hesitating_idmt",
"evaluation": {
"accuracy": 0.64
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='i_am_hesitating_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "plausible_alternatives_idmt",
"evaluation": {
"accuracy": 0.7
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='id', template_name='plausible_alternatives_idmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "C1 or C2? premise_swmt",
"evaluation": {
"accuracy": 0.6
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='C1 or C2? premise_swmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "best_option_swmt",
"evaluation": {
"accuracy": 0.62
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='best_option_swmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "cause_effect_swmt",
"evaluation": {
"accuracy": 0.49
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='cause_effect_swmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "i_am_hesitating_swmt",
"evaluation": {
"accuracy": 0.56
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='i_am_hesitating_swmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "plausible_alternatives_swmt",
"evaluation": {
"accuracy": 0.54
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='sw', template_name='plausible_alternatives_swmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "ta",
"template_name": "C1 or C2? premise_tamt",
"evaluation": {
"accuracy": 0.52
},
"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='C1 or C2? premise_tamt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "ta",
"template_name": "best_option_tamt",
"evaluation": {
"accuracy": 0.55
},
"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='best_option_tamt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "ta",
"template_name": "cause_effect_tamt",
"evaluation": {
"accuracy": 0.63
},
"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='cause_effect_tamt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "ta",
"template_name": "i_am_hesitating_tamt",
"evaluation": {
"accuracy": 0.63
},
"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='i_am_hesitating_tamt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "ta",
"template_name": "plausible_alternatives_tamt",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='ta', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ta', template_name='plausible_alternatives_tamt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "vi",
"template_name": "C1 or C2? premise_vimt",
"evaluation": {
"accuracy": 0.55
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='C1 or C2? premise_vimt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "vi",
"template_name": "best_option_vimt",
"evaluation": {
"accuracy": 0.61
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='best_option_vimt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "vi",
"template_name": "cause_effect_vimt",
"evaluation": {
"accuracy": 0.64
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='cause_effect_vimt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "vi",
"template_name": "i_am_hesitating_vimt",
"evaluation": {
"accuracy": 0.6
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='i_am_hesitating_vimt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "vi",
"template_name": "plausible_alternatives_vimt",
"evaluation": {
"accuracy": 0.64
},
"arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='plausible_alternatives_vimt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "zh",
"template_name": "C1 or C2? premise_zhmt",
"evaluation": {
"accuracy": 0.52
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='C1 or C2? premise_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "zh",
"template_name": "best_option_zhmt",
"evaluation": {
"accuracy": 0.61
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='best_option_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "zh",
"template_name": "cause_effect_zhmt",
"evaluation": {
"accuracy": 0.75
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='cause_effect_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "zh",
"template_name": "i_am_hesitating_zhmt",
"evaluation": {
"accuracy": 0.72
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='i_am_hesitating_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "zh",
"template_name": "plausible_alternatives_zhmt",
"evaluation": {
"accuracy": 0.76
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='plausible_alternatives_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
}