初始化项目,由ModelHub XC社区提供模型

Model: bigscience/bloomz-7b1-p3
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-15 07:40:14 +08:00
commit 78a6661ff1
634 changed files with 7477 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7518199867637326
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7749834546657842
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.586366644606221
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7518199867637326
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7438782263401721
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7835870284579749
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.8292521508934481
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6399735274652548
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7935142289874255
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7888815354070152
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7041694242223693
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.6823295830575777
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.5625413633355394
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.6671078755790867
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.671740569159497
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.6915949702183984
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7220383851753805
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.5883520847121112
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.6743878226340172
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.6816677696889477
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7445400397088021
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.771012574454004
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6029119788219722
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7485109199205824
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7438782263401721
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7610853739245532
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7961614824619457
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6214427531436135
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7696889477167439
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7670416942422237
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5225806451612903
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.48946236559139783
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5281720430107527
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.5062365591397849
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5372043010752688
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5060240963855421
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5421686746987951
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5542168674698795
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.4819277108433735
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5301204819277109
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5133079847908745
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.4714828897338403
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5209125475285171
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.5019011406844106
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5399239543726235
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5257936507936508
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5297619047619048
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5218253968253969
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.4444444444444444
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5198412698412699
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.351
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.334
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.351
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.288
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.345
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.339
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.354
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.297
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.345
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.37583333333333335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.3408333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.36333333333333334
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.31083333333333335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.34
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,194 @@
dataset,prompt,metric,value
anli_dev_r1,GPT-3 style,accuracy,0.351
anli_dev_r1,MNLI crowdsource,accuracy,0.334
anli_dev_r1,can we infer,accuracy,0.351
anli_dev_r1,guaranteed/possible/impossible,accuracy,0.288
anli_dev_r1,justified in saying,accuracy,0.345
anli_dev_r1,median,accuracy,0.345
anli_dev_r2,GPT-3 style,accuracy,0.339
anli_dev_r2,MNLI crowdsource,accuracy,0.335
anli_dev_r2,can we infer,accuracy,0.354
anli_dev_r2,guaranteed/possible/impossible,accuracy,0.297
anli_dev_r2,justified in saying,accuracy,0.345
anli_dev_r2,median,accuracy,0.339
anli_dev_r3,GPT-3 style,accuracy,0.37583333333333335
anli_dev_r3,MNLI crowdsource,accuracy,0.3408333333333333
anli_dev_r3,can we infer,accuracy,0.36333333333333334
anli_dev_r3,guaranteed/possible/impossible,accuracy,0.31083333333333335
anli_dev_r3,justified in saying,accuracy,0.34
anli_dev_r3,median,accuracy,0.3408333333333333
story_cloze_2016,Answer Given options,accuracy,0.8305718866916088
story_cloze_2016,Choose Story Ending,accuracy,0.8706574024585783
story_cloze_2016,Generate Ending,accuracy,0.7183324425440941
story_cloze_2016,Novel Correct Ending,accuracy,0.848743987172635
story_cloze_2016,Story Continuation and Options,accuracy,0.8466060929983966
story_cloze_2016,median,accuracy,0.8466060929983966
super_glue_cb,GPT-3 style,accuracy,0.625
super_glue_cb,MNLI crowdsource,accuracy,0.08928571428571429
super_glue_cb,can we infer,accuracy,0.5892857142857143
super_glue_cb,guaranteed/possible/impossible,accuracy,0.5
super_glue_cb,justified in saying,accuracy,0.5357142857142857
super_glue_cb,median,accuracy,0.5357142857142857
super_glue_copa,"C1 or C2? premise, so/because…",accuracy,0.66
super_glue_copa,best_option,accuracy,0.67
super_glue_copa,cause_effect,accuracy,0.78
super_glue_copa,i_am_hesitating,accuracy,0.8
super_glue_copa,plausible_alternatives,accuracy,0.81
super_glue_copa,median,accuracy,0.78
super_glue_rte,GPT-3 style,accuracy,0.7870036101083032
super_glue_rte,MNLI crowdsource,accuracy,0.7220216606498195
super_glue_rte,does it follow that,accuracy,0.6678700361010831
super_glue_rte,guaranteed true,accuracy,0.6714801444043321
super_glue_rte,should assume,accuracy,0.6678700361010831
super_glue_rte,median,accuracy,0.6714801444043321
winogrande_winogrande_xl,Replace,accuracy,0.5406471981057617
winogrande_winogrande_xl,True or False,accuracy,0.5074980268350434
winogrande_winogrande_xl,does underscore refer to,accuracy,0.5177584846093133
winogrande_winogrande_xl,stand for,accuracy,0.510655090765588
winogrande_winogrande_xl,underscore refer to,accuracy,0.5256511444356748
winogrande_winogrande_xl,median,accuracy,0.5177584846093133
xcopa_id,"C1 or C2? premise, so/because…",accuracy,0.47
xcopa_id,best_option,accuracy,0.51
xcopa_id,cause_effect,accuracy,0.65
xcopa_id,i_am_hesitating,accuracy,0.66
xcopa_id,plausible_alternatives,accuracy,0.67
xcopa_id,median,accuracy,0.65
xcopa_sw,"C1 or C2? premise, so/because…",accuracy,0.58
xcopa_sw,best_option,accuracy,0.57
xcopa_sw,cause_effect,accuracy,0.46
xcopa_sw,i_am_hesitating,accuracy,0.48
xcopa_sw,plausible_alternatives,accuracy,0.45
xcopa_sw,median,accuracy,0.48
xcopa_ta,"C1 or C2? premise, so/because…",accuracy,0.57
xcopa_ta,best_option,accuracy,0.67
xcopa_ta,cause_effect,accuracy,0.71
xcopa_ta,i_am_hesitating,accuracy,0.71
xcopa_ta,plausible_alternatives,accuracy,0.69
xcopa_ta,median,accuracy,0.69
xcopa_vi,"C1 or C2? premise, so/because…",accuracy,0.55
xcopa_vi,best_option,accuracy,0.61
xcopa_vi,cause_effect,accuracy,0.67
xcopa_vi,i_am_hesitating,accuracy,0.66
xcopa_vi,plausible_alternatives,accuracy,0.65
xcopa_vi,median,accuracy,0.65
xcopa_zh,"C1 or C2? premise, so/because…",accuracy,0.62
xcopa_zh,best_option,accuracy,0.61
xcopa_zh,cause_effect,accuracy,0.77
xcopa_zh,i_am_hesitating,accuracy,0.72
xcopa_zh,plausible_alternatives,accuracy,0.74
xcopa_zh,median,accuracy,0.72
xnli_ar,GPT-3 style,accuracy,0.5040160642570282
xnli_ar,MNLI crowdsource,accuracy,0.39879518072289155
xnli_ar,can we infer,accuracy,0.506425702811245
xnli_ar,guaranteed/possible/impossible,accuracy,0.4799196787148594
xnli_ar,justified in saying,accuracy,0.41526104417670684
xnli_ar,median,accuracy,0.4799196787148594
xnli_en,GPT-3 style,accuracy,0.5590361445783133
xnli_en,MNLI crowdsource,accuracy,0.342570281124498
xnli_en,can we infer,accuracy,0.5449799196787148
xnli_en,guaranteed/possible/impossible,accuracy,0.41164658634538154
xnli_en,justified in saying,accuracy,0.4634538152610442
xnli_en,median,accuracy,0.4634538152610442
xnli_es,GPT-3 style,accuracy,0.5373493975903615
xnli_es,MNLI crowdsource,accuracy,0.40441767068273093
xnli_es,can we infer,accuracy,0.5277108433734939
xnli_es,guaranteed/possible/impossible,accuracy,0.44216867469879517
xnli_es,justified in saying,accuracy,0.4534136546184739
xnli_es,median,accuracy,0.4534136546184739
xnli_fr,GPT-3 style,accuracy,0.5248995983935743
xnli_fr,MNLI crowdsource,accuracy,0.3895582329317269
xnli_fr,can we infer,accuracy,0.5337349397590362
xnli_fr,guaranteed/possible/impossible,accuracy,0.42971887550200805
xnli_fr,justified in saying,accuracy,0.4738955823293173
xnli_fr,median,accuracy,0.4738955823293173
xnli_hi,GPT-3 style,accuracy,0.4983935742971888
xnli_hi,MNLI crowdsource,accuracy,0.38714859437751004
xnli_hi,can we infer,accuracy,0.45542168674698796
xnli_hi,guaranteed/possible/impossible,accuracy,0.41405622489959837
xnli_hi,justified in saying,accuracy,0.38795180722891565
xnli_hi,median,accuracy,0.41405622489959837
xnli_sw,GPT-3 style,accuracy,0.43493975903614457
xnli_sw,MNLI crowdsource,accuracy,0.363855421686747
xnli_sw,can we infer,accuracy,0.42891566265060244
xnli_sw,guaranteed/possible/impossible,accuracy,0.3457831325301205
xnli_sw,justified in saying,accuracy,0.3650602409638554
xnli_sw,median,accuracy,0.3650602409638554
xnli_ur,GPT-3 style,accuracy,0.43493975903614457
xnli_ur,MNLI crowdsource,accuracy,0.3895582329317269
xnli_ur,can we infer,accuracy,0.45180722891566266
xnli_ur,guaranteed/possible/impossible,accuracy,0.40120481927710844
xnli_ur,justified in saying,accuracy,0.37630522088353413
xnli_ur,median,accuracy,0.40120481927710844
xnli_vi,GPT-3 style,accuracy,0.5196787148594377
xnli_vi,MNLI crowdsource,accuracy,0.38112449799196785
xnli_vi,can we infer,accuracy,0.5080321285140562
xnli_vi,guaranteed/possible/impossible,accuracy,0.38393574297188754
xnli_vi,justified in saying,accuracy,0.43614457831325304
xnli_vi,median,accuracy,0.43614457831325304
xnli_zh,GPT-3 style,accuracy,0.5052208835341365
xnli_zh,MNLI crowdsource,accuracy,0.4
xnli_zh,can we infer,accuracy,0.5228915662650603
xnli_zh,guaranteed/possible/impossible,accuracy,0.4738955823293173
xnli_zh,justified in saying,accuracy,0.45863453815261046
xnli_zh,median,accuracy,0.4738955823293173
xstory_cloze_ar,Answer Given options,accuracy,0.7518199867637326
xstory_cloze_ar,Choose Story Ending,accuracy,0.7749834546657842
xstory_cloze_ar,Generate Ending,accuracy,0.586366644606221
xstory_cloze_ar,Novel Correct Ending,accuracy,0.7518199867637326
xstory_cloze_ar,Story Continuation and Options,accuracy,0.7438782263401721
xstory_cloze_ar,median,accuracy,0.7518199867637326
xstory_cloze_es,Answer Given options,accuracy,0.7835870284579749
xstory_cloze_es,Choose Story Ending,accuracy,0.8292521508934481
xstory_cloze_es,Generate Ending,accuracy,0.6399735274652548
xstory_cloze_es,Novel Correct Ending,accuracy,0.7935142289874255
xstory_cloze_es,Story Continuation and Options,accuracy,0.7888815354070152
xstory_cloze_es,median,accuracy,0.7888815354070152
xstory_cloze_eu,Answer Given options,accuracy,0.7041694242223693
xstory_cloze_eu,Choose Story Ending,accuracy,0.6823295830575777
xstory_cloze_eu,Generate Ending,accuracy,0.5625413633355394
xstory_cloze_eu,Novel Correct Ending,accuracy,0.6671078755790867
xstory_cloze_eu,Story Continuation and Options,accuracy,0.671740569159497
xstory_cloze_eu,median,accuracy,0.671740569159497
xstory_cloze_hi,Answer Given options,accuracy,0.6915949702183984
xstory_cloze_hi,Choose Story Ending,accuracy,0.7220383851753805
xstory_cloze_hi,Generate Ending,accuracy,0.5883520847121112
xstory_cloze_hi,Novel Correct Ending,accuracy,0.6743878226340172
xstory_cloze_hi,Story Continuation and Options,accuracy,0.6816677696889477
xstory_cloze_hi,median,accuracy,0.6816677696889477
xstory_cloze_id,Answer Given options,accuracy,0.7445400397088021
xstory_cloze_id,Choose Story Ending,accuracy,0.771012574454004
xstory_cloze_id,Generate Ending,accuracy,0.6029119788219722
xstory_cloze_id,Novel Correct Ending,accuracy,0.7485109199205824
xstory_cloze_id,Story Continuation and Options,accuracy,0.7438782263401721
xstory_cloze_id,median,accuracy,0.7445400397088021
xstory_cloze_zh,Answer Given options,accuracy,0.7610853739245532
xstory_cloze_zh,Choose Story Ending,accuracy,0.7961614824619457
xstory_cloze_zh,Generate Ending,accuracy,0.6214427531436135
xstory_cloze_zh,Novel Correct Ending,accuracy,0.7696889477167439
xstory_cloze_zh,Story Continuation and Options,accuracy,0.7670416942422237
xstory_cloze_zh,median,accuracy,0.7670416942422237
xwinograd_en,Replace,accuracy,0.5225806451612903
xwinograd_en,True or False,accuracy,0.48946236559139783
xwinograd_en,does underscore refer to,accuracy,0.5281720430107527
xwinograd_en,stand for,accuracy,0.5062365591397849
xwinograd_en,underscore refer to,accuracy,0.5372043010752688
xwinograd_en,median,accuracy,0.5225806451612903
xwinograd_fr,Replace,accuracy,0.5060240963855421
xwinograd_fr,True or False,accuracy,0.5421686746987951
xwinograd_fr,does underscore refer to,accuracy,0.5542168674698795
xwinograd_fr,stand for,accuracy,0.4819277108433735
xwinograd_fr,underscore refer to,accuracy,0.5301204819277109
xwinograd_fr,median,accuracy,0.5301204819277109
xwinograd_pt,Replace,accuracy,0.5133079847908745
xwinograd_pt,True or False,accuracy,0.4714828897338403
xwinograd_pt,does underscore refer to,accuracy,0.5209125475285171
xwinograd_pt,stand for,accuracy,0.5019011406844106
xwinograd_pt,underscore refer to,accuracy,0.5399239543726235
xwinograd_pt,median,accuracy,0.5133079847908745
xwinograd_zh,Replace,accuracy,0.5257936507936508
xwinograd_zh,True or False,accuracy,0.5297619047619048
xwinograd_zh,does underscore refer to,accuracy,0.5218253968253969
xwinograd_zh,stand for,accuracy,0.4444444444444444
xwinograd_zh,underscore refer to,accuracy,0.5198412698412699
xwinograd_zh,median,accuracy,0.5218253968253969
multiple,average,multiple,0.5631550819200618
1 dataset prompt metric value
2 anli_dev_r1 GPT-3 style accuracy 0.351
3 anli_dev_r1 MNLI crowdsource accuracy 0.334
4 anli_dev_r1 can we infer accuracy 0.351
5 anli_dev_r1 guaranteed/possible/impossible accuracy 0.288
6 anli_dev_r1 justified in saying accuracy 0.345
7 anli_dev_r1 median accuracy 0.345
8 anli_dev_r2 GPT-3 style accuracy 0.339
9 anli_dev_r2 MNLI crowdsource accuracy 0.335
10 anli_dev_r2 can we infer accuracy 0.354
11 anli_dev_r2 guaranteed/possible/impossible accuracy 0.297
12 anli_dev_r2 justified in saying accuracy 0.345
13 anli_dev_r2 median accuracy 0.339
14 anli_dev_r3 GPT-3 style accuracy 0.37583333333333335
15 anli_dev_r3 MNLI crowdsource accuracy 0.3408333333333333
16 anli_dev_r3 can we infer accuracy 0.36333333333333334
17 anli_dev_r3 guaranteed/possible/impossible accuracy 0.31083333333333335
18 anli_dev_r3 justified in saying accuracy 0.34
19 anli_dev_r3 median accuracy 0.3408333333333333
20 story_cloze_2016 Answer Given options accuracy 0.8305718866916088
21 story_cloze_2016 Choose Story Ending accuracy 0.8706574024585783
22 story_cloze_2016 Generate Ending accuracy 0.7183324425440941
23 story_cloze_2016 Novel Correct Ending accuracy 0.848743987172635
24 story_cloze_2016 Story Continuation and Options accuracy 0.8466060929983966
25 story_cloze_2016 median accuracy 0.8466060929983966
26 super_glue_cb GPT-3 style accuracy 0.625
27 super_glue_cb MNLI crowdsource accuracy 0.08928571428571429
28 super_glue_cb can we infer accuracy 0.5892857142857143
29 super_glue_cb guaranteed/possible/impossible accuracy 0.5
30 super_glue_cb justified in saying accuracy 0.5357142857142857
31 super_glue_cb median accuracy 0.5357142857142857
32 super_glue_copa C1 or C2? premise, so/because… accuracy 0.66
33 super_glue_copa best_option accuracy 0.67
34 super_glue_copa cause_effect accuracy 0.78
35 super_glue_copa i_am_hesitating accuracy 0.8
36 super_glue_copa plausible_alternatives accuracy 0.81
37 super_glue_copa median accuracy 0.78
38 super_glue_rte GPT-3 style accuracy 0.7870036101083032
39 super_glue_rte MNLI crowdsource accuracy 0.7220216606498195
40 super_glue_rte does it follow that accuracy 0.6678700361010831
41 super_glue_rte guaranteed true accuracy 0.6714801444043321
42 super_glue_rte should assume accuracy 0.6678700361010831
43 super_glue_rte median accuracy 0.6714801444043321
44 winogrande_winogrande_xl Replace accuracy 0.5406471981057617
45 winogrande_winogrande_xl True or False accuracy 0.5074980268350434
46 winogrande_winogrande_xl does underscore refer to accuracy 0.5177584846093133
47 winogrande_winogrande_xl stand for accuracy 0.510655090765588
48 winogrande_winogrande_xl underscore refer to accuracy 0.5256511444356748
49 winogrande_winogrande_xl median accuracy 0.5177584846093133
50 xcopa_id C1 or C2? premise, so/because… accuracy 0.47
51 xcopa_id best_option accuracy 0.51
52 xcopa_id cause_effect accuracy 0.65
53 xcopa_id i_am_hesitating accuracy 0.66
54 xcopa_id plausible_alternatives accuracy 0.67
55 xcopa_id median accuracy 0.65
56 xcopa_sw C1 or C2? premise, so/because… accuracy 0.58
57 xcopa_sw best_option accuracy 0.57
58 xcopa_sw cause_effect accuracy 0.46
59 xcopa_sw i_am_hesitating accuracy 0.48
60 xcopa_sw plausible_alternatives accuracy 0.45
61 xcopa_sw median accuracy 0.48
62 xcopa_ta C1 or C2? premise, so/because… accuracy 0.57
63 xcopa_ta best_option accuracy 0.67
64 xcopa_ta cause_effect accuracy 0.71
65 xcopa_ta i_am_hesitating accuracy 0.71
66 xcopa_ta plausible_alternatives accuracy 0.69
67 xcopa_ta median accuracy 0.69
68 xcopa_vi C1 or C2? premise, so/because… accuracy 0.55
69 xcopa_vi best_option accuracy 0.61
70 xcopa_vi cause_effect accuracy 0.67
71 xcopa_vi i_am_hesitating accuracy 0.66
72 xcopa_vi plausible_alternatives accuracy 0.65
73 xcopa_vi median accuracy 0.65
74 xcopa_zh C1 or C2? premise, so/because… accuracy 0.62
75 xcopa_zh best_option accuracy 0.61
76 xcopa_zh cause_effect accuracy 0.77
77 xcopa_zh i_am_hesitating accuracy 0.72
78 xcopa_zh plausible_alternatives accuracy 0.74
79 xcopa_zh median accuracy 0.72
80 xnli_ar GPT-3 style accuracy 0.5040160642570282
81 xnli_ar MNLI crowdsource accuracy 0.39879518072289155
82 xnli_ar can we infer accuracy 0.506425702811245
83 xnli_ar guaranteed/possible/impossible accuracy 0.4799196787148594
84 xnli_ar justified in saying accuracy 0.41526104417670684
85 xnli_ar median accuracy 0.4799196787148594
86 xnli_en GPT-3 style accuracy 0.5590361445783133
87 xnli_en MNLI crowdsource accuracy 0.342570281124498
88 xnli_en can we infer accuracy 0.5449799196787148
89 xnli_en guaranteed/possible/impossible accuracy 0.41164658634538154
90 xnli_en justified in saying accuracy 0.4634538152610442
91 xnli_en median accuracy 0.4634538152610442
92 xnli_es GPT-3 style accuracy 0.5373493975903615
93 xnli_es MNLI crowdsource accuracy 0.40441767068273093
94 xnli_es can we infer accuracy 0.5277108433734939
95 xnli_es guaranteed/possible/impossible accuracy 0.44216867469879517
96 xnli_es justified in saying accuracy 0.4534136546184739
97 xnli_es median accuracy 0.4534136546184739
98 xnli_fr GPT-3 style accuracy 0.5248995983935743
99 xnli_fr MNLI crowdsource accuracy 0.3895582329317269
100 xnli_fr can we infer accuracy 0.5337349397590362
101 xnli_fr guaranteed/possible/impossible accuracy 0.42971887550200805
102 xnli_fr justified in saying accuracy 0.4738955823293173
103 xnli_fr median accuracy 0.4738955823293173
104 xnli_hi GPT-3 style accuracy 0.4983935742971888
105 xnli_hi MNLI crowdsource accuracy 0.38714859437751004
106 xnli_hi can we infer accuracy 0.45542168674698796
107 xnli_hi guaranteed/possible/impossible accuracy 0.41405622489959837
108 xnli_hi justified in saying accuracy 0.38795180722891565
109 xnli_hi median accuracy 0.41405622489959837
110 xnli_sw GPT-3 style accuracy 0.43493975903614457
111 xnli_sw MNLI crowdsource accuracy 0.363855421686747
112 xnli_sw can we infer accuracy 0.42891566265060244
113 xnli_sw guaranteed/possible/impossible accuracy 0.3457831325301205
114 xnli_sw justified in saying accuracy 0.3650602409638554
115 xnli_sw median accuracy 0.3650602409638554
116 xnli_ur GPT-3 style accuracy 0.43493975903614457
117 xnli_ur MNLI crowdsource accuracy 0.3895582329317269
118 xnli_ur can we infer accuracy 0.45180722891566266
119 xnli_ur guaranteed/possible/impossible accuracy 0.40120481927710844
120 xnli_ur justified in saying accuracy 0.37630522088353413
121 xnli_ur median accuracy 0.40120481927710844
122 xnli_vi GPT-3 style accuracy 0.5196787148594377
123 xnli_vi MNLI crowdsource accuracy 0.38112449799196785
124 xnli_vi can we infer accuracy 0.5080321285140562
125 xnli_vi guaranteed/possible/impossible accuracy 0.38393574297188754
126 xnli_vi justified in saying accuracy 0.43614457831325304
127 xnli_vi median accuracy 0.43614457831325304
128 xnli_zh GPT-3 style accuracy 0.5052208835341365
129 xnli_zh MNLI crowdsource accuracy 0.4
130 xnli_zh can we infer accuracy 0.5228915662650603
131 xnli_zh guaranteed/possible/impossible accuracy 0.4738955823293173
132 xnli_zh justified in saying accuracy 0.45863453815261046
133 xnli_zh median accuracy 0.4738955823293173
134 xstory_cloze_ar Answer Given options accuracy 0.7518199867637326
135 xstory_cloze_ar Choose Story Ending accuracy 0.7749834546657842
136 xstory_cloze_ar Generate Ending accuracy 0.586366644606221
137 xstory_cloze_ar Novel Correct Ending accuracy 0.7518199867637326
138 xstory_cloze_ar Story Continuation and Options accuracy 0.7438782263401721
139 xstory_cloze_ar median accuracy 0.7518199867637326
140 xstory_cloze_es Answer Given options accuracy 0.7835870284579749
141 xstory_cloze_es Choose Story Ending accuracy 0.8292521508934481
142 xstory_cloze_es Generate Ending accuracy 0.6399735274652548
143 xstory_cloze_es Novel Correct Ending accuracy 0.7935142289874255
144 xstory_cloze_es Story Continuation and Options accuracy 0.7888815354070152
145 xstory_cloze_es median accuracy 0.7888815354070152
146 xstory_cloze_eu Answer Given options accuracy 0.7041694242223693
147 xstory_cloze_eu Choose Story Ending accuracy 0.6823295830575777
148 xstory_cloze_eu Generate Ending accuracy 0.5625413633355394
149 xstory_cloze_eu Novel Correct Ending accuracy 0.6671078755790867
150 xstory_cloze_eu Story Continuation and Options accuracy 0.671740569159497
151 xstory_cloze_eu median accuracy 0.671740569159497
152 xstory_cloze_hi Answer Given options accuracy 0.6915949702183984
153 xstory_cloze_hi Choose Story Ending accuracy 0.7220383851753805
154 xstory_cloze_hi Generate Ending accuracy 0.5883520847121112
155 xstory_cloze_hi Novel Correct Ending accuracy 0.6743878226340172
156 xstory_cloze_hi Story Continuation and Options accuracy 0.6816677696889477
157 xstory_cloze_hi median accuracy 0.6816677696889477
158 xstory_cloze_id Answer Given options accuracy 0.7445400397088021
159 xstory_cloze_id Choose Story Ending accuracy 0.771012574454004
160 xstory_cloze_id Generate Ending accuracy 0.6029119788219722
161 xstory_cloze_id Novel Correct Ending accuracy 0.7485109199205824
162 xstory_cloze_id Story Continuation and Options accuracy 0.7438782263401721
163 xstory_cloze_id median accuracy 0.7445400397088021
164 xstory_cloze_zh Answer Given options accuracy 0.7610853739245532
165 xstory_cloze_zh Choose Story Ending accuracy 0.7961614824619457
166 xstory_cloze_zh Generate Ending accuracy 0.6214427531436135
167 xstory_cloze_zh Novel Correct Ending accuracy 0.7696889477167439
168 xstory_cloze_zh Story Continuation and Options accuracy 0.7670416942422237
169 xstory_cloze_zh median accuracy 0.7670416942422237
170 xwinograd_en Replace accuracy 0.5225806451612903
171 xwinograd_en True or False accuracy 0.48946236559139783
172 xwinograd_en does underscore refer to accuracy 0.5281720430107527
173 xwinograd_en stand for accuracy 0.5062365591397849
174 xwinograd_en underscore refer to accuracy 0.5372043010752688
175 xwinograd_en median accuracy 0.5225806451612903
176 xwinograd_fr Replace accuracy 0.5060240963855421
177 xwinograd_fr True or False accuracy 0.5421686746987951
178 xwinograd_fr does underscore refer to accuracy 0.5542168674698795
179 xwinograd_fr stand for accuracy 0.4819277108433735
180 xwinograd_fr underscore refer to accuracy 0.5301204819277109
181 xwinograd_fr median accuracy 0.5301204819277109
182 xwinograd_pt Replace accuracy 0.5133079847908745
183 xwinograd_pt True or False accuracy 0.4714828897338403
184 xwinograd_pt does underscore refer to accuracy 0.5209125475285171
185 xwinograd_pt stand for accuracy 0.5019011406844106
186 xwinograd_pt underscore refer to accuracy 0.5399239543726235
187 xwinograd_pt median accuracy 0.5133079847908745
188 xwinograd_zh Replace accuracy 0.5257936507936508
189 xwinograd_zh True or False accuracy 0.5297619047619048
190 xwinograd_zh does underscore refer to accuracy 0.5218253968253969
191 xwinograd_zh stand for accuracy 0.4444444444444444
192 xwinograd_zh underscore refer to accuracy 0.5198412698412699
193 xwinograd_zh median accuracy 0.5218253968253969
194 multiple average multiple 0.5631550819200618

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.8305718866916088
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.8706574024585783
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.7183324425440941
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.848743987172635
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.8466060929983966
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.625
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.08928571428571429
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.5892857142857143
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.5
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.5357142857142857
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise, so/because\u2026', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "best_option",
"evaluation": {
"accuracy": 0.67
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "cause_effect",
"evaluation": {
"accuracy": 0.78
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "i_am_hesitating",
"evaluation": {
"accuracy": 0.8
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "plausible_alternatives",
"evaluation": {
"accuracy": 0.81
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.7870036101083032
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.7220216606498195
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "does it follow that",
"evaluation": {
"accuracy": 0.6678700361010831
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "guaranteed true",
"evaluation": {
"accuracy": 0.6714801444043321
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "should assume",
"evaluation": {
"accuracy": 0.6678700361010831
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5406471981057617
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5074980268350434
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5177584846093133
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.510655090765588
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5256511444356748
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.47
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "best_option",
"evaluation": {
"accuracy": 0.51
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "cause_effect",
"evaluation": {
"accuracy": 0.65
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "i_am_hesitating",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "plausible_alternatives",
"evaluation": {
"accuracy": 0.67
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.58
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "sw",
"template_name": "best_option",
"evaluation": {
"accuracy": 0.57
},
"arguments": "Namespace(config_name=None, dataset_config_name='sw', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
}

Some files were not shown because too many files have changed in this diff Show More