初始化项目,由ModelHub XC社区提供模型
Model: bigscience/bloomz-7b1-p3 Source: Original Platform
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "a_good_translation-en-fr-source+target", "bleu": 2.125573406419127, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "a3a87505-e423-4c03-9a22-a3da4ccbeae5", "prompt_jinja": "Given the following source text in English: {{translation[\"en\"]}} , a good French translation is:\n||| {{translation[\"fr\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09981676122698169}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "a_good_translation-en-fr-target", "bleu": 1.5697853682886957, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "474c20a1-a2ea-4ff4-b4c8-7f9c6466ff20", "prompt_jinja": "Given the following passage: {{translation[\"en\"]}} , a good French translation is: ||| {{translation[\"fr\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10176333685236229}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "a_good_translation-fr-en-source+target", "bleu": 30.388346190168132, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "43dc1b77-e8ea-4dc8-8a12-0abc3b0dbba0", "prompt_jinja": "Given the following source text in French: {{translation[\"fr\"]}} , a good English translation is: ||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.28706919566129924}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "a_good_translation-fr-en-target", "bleu": 22.361703612398195, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "762c0878-c8fc-43ec-839f-d5d8435a94f6", "prompt_jinja": "Given the following passage: {{translation[\"fr\"]}} , a good English translation is:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.43872418791072576}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "gpt3-en-fr", "bleu": 0.37928468482204986, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "fc3b96b0-de5e-4ff4-b7bb-cda348ff7fcf", "prompt_jinja": "Q: What is the French translation of {{translation[\"en\"]}} A: ||| {{translation[\"fr\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03833854862936989}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "gpt3-fr-en", "bleu": 17.167001660570335, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "gc3b96b0-de5e-4ff4-b7bb-cda348ff7fcf", "prompt_jinja": "Q: What is the English translation of {{translation[\"fr\"]}} A: ||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.3999014258297822}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "version-en-fr-target", "bleu": 4.788559958687529, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "c80e443a-0ba4-4c5d-be98-998e050a202d", "prompt_jinja": "If the original version says: {{translation[\"en\"]}}; then the French version should say:\n||| {{translation[\"fr\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12647149552786194}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "version-fr-en-target", "bleu": 23.925613843737143, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "9fe6b44b-2dc6-4557-8201-14d6ea7668ff", "prompt_jinja": "If the original version says: {{translation[\"fr\"]}}; then the English version should say:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.2692548707999714}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "xglm-en-fr-target", "bleu": 2.186171298454336, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "2fc841fb-b872-4cc6-9a88-735d6bb7e2e3", "prompt_jinja": "{{translation[\"en\"]}} = French:\n||| {{translation[\"fr\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09641163271059554}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_fr_en", "prompt_name": "xglm-fr-en-target", "bleu": 14.10190003658709, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "fr-en", "subset": null, "prompt_id": "957b8554-a00a-4652-b080-e9ee3ccae381", "prompt_jinja": "{{translation[\"fr\"]}} = English:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1974741324240151}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "a_good_translation-en-hi-source+target", "bleu": 0.18051438917625368, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "d1e354a7-8fa3-415a-9bb7-755e1ae21813", "prompt_jinja": "Given the following source text in English: {{translation[\"en\"]}} , a good Hindi translation is:\n||| {{translation[\"hi\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03338441915097909}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "a_good_translation-en-hi-target", "bleu": 0.1812629246502659, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "42379c42-04c5-4ea9-99ca-f43f1b1cfc1b", "prompt_jinja": "Given the following passage: {{translation[\"en\"]}} , a good Hindi translation is: ||| {{translation[\"hi\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04198901460363051}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "a_good_translation-hi-en-source+target", "bleu": 16.056644593701627, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "03664fac-67ef-414d-8e4a-504ad4d7a8a0", "prompt_jinja": "Given the following source text in Hindi: {{translation[\"hi\"]}} , a good English translation is: ||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.2809620281933667}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "a_good_translation-hi-en-target", "bleu": 15.032491079468809, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "fbd2d598-80e9-4ce6-b85e-fb269aa82580", "prompt_jinja": "Given the following passage: {{translation[\"hi\"]}} , a good English translation is:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.2628594862835867}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "version-en-hi-target", "bleu": 0.1858574511075315, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "b5952cac-9388-4901-98ed-c45cccfed5de", "prompt_jinja": "If the original version says: {{translation[\"en\"]}}; then the Hindi version should say:\n||| {{translation[\"hi\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.029122685049572238}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "version-hi-en-target", "bleu": 15.167071858881462, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "02ec7175-a97c-4c0f-982f-1cc8c4c050d1", "prompt_jinja": "If the original version says: {{translation[\"hi\"]}}; then the English version should say:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.2573529636593602}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "xglm-en-hi-target", "bleu": 0.002225608801197892, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "25daf37e-e684-419e-a250-bdeeb82a7df6", "prompt_jinja": "{{translation[\"en\"]}} = Hindi:\n||| {{translation[\"hi\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0005988947090265846}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1 @@
|
||||
{"results": [{"task_name": "wmt14_hi_en", "prompt_name": "xglm-hi-en-target", "bleu": 3.675518735361532, "fixed_answer_choice_list": null, "dataset_path": "wmt14", "dataset_name": "hi-en", "subset": null, "prompt_id": "9b430f52-31a1-4b7b-9600-59069a706b2c", "prompt_jinja": "{{translation[\"hi\"]}} = English:\n||| {{translation[\"en\"]}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.17101231729659816}], "config": {"model": "hf-causal", "model_args": "pretrained=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,use_accelerate=True,tokenizer=/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000,dtype=float16", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "prompt_body_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6176
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "prompt_review_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5592
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "prompt_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3922
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "prompt_body_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5526
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "prompt_review_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5296
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "prompt_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3646
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "prompt_body_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5332
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "prompt_review_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5182
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "prompt_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3644
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "prompt_body_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5174
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "prompt_review_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5006
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "amazon_reviews_multi",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "prompt_title_to_star",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3874
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "aqua_rat",
|
||||
"dataset_config_name": "raw",
|
||||
"template_name": "Answer questions from options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.24015748031496062
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer questions from options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "aqua_rat",
|
||||
"dataset_config_name": "raw",
|
||||
"template_name": "answer_quiz",
|
||||
"evaluation": {
|
||||
"accuracy": 0.22440944881889763
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_quiz', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "aqua_rat",
|
||||
"dataset_config_name": "raw",
|
||||
"template_name": "select_the_best_option",
|
||||
"evaluation": {
|
||||
"accuracy": 0.2559055118110236
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='select_the_best_option', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "art",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_hypothesis",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5926892950391645
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "art",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_hypothesis_believable",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5711488250652742
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_believable', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "art",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_hypothesis_desc",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5169712793733682
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_desc', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "art",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_hypothesis_likely",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5300261096605744
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_likely', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "art",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_hypothesis_options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5672323759791122
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "banking77",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "direct_to_which_department",
|
||||
"evaluation": {
|
||||
"accuracy": 0.16753246753246753
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='direct_to_which_department', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "banking77",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "help_page_topic",
|
||||
"evaluation": {
|
||||
"accuracy": 0.26785714285714285
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='help_page_topic', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "banking77",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "rephrase_as_banking_term",
|
||||
"evaluation": {
|
||||
"accuracy": 0.274025974025974
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='rephrase_as_banking_term', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blbooksgenre",
|
||||
"dataset_config_name": "title_genre_classifiction",
|
||||
"template_name": "classify",
|
||||
"evaluation": {
|
||||
"accuracy": 0.25057603686635943
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='classify', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blbooksgenre",
|
||||
"dataset_config_name": "title_genre_classifiction",
|
||||
"template_name": "multi-choice",
|
||||
"evaluation": {
|
||||
"accuracy": 0.25057603686635943
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='multi-choice', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blbooksgenre",
|
||||
"dataset_config_name": "title_genre_classifiction",
|
||||
"template_name": "premise_context_first",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7321428571428571
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='premise_context_first', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blimp",
|
||||
"dataset_config_name": "adjunct_island",
|
||||
"template_name": "grammatical_between_1_2",
|
||||
"evaluation": {
|
||||
"accuracy": 0.512
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blimp",
|
||||
"dataset_config_name": "adjunct_island",
|
||||
"template_name": "grammatical_between_A_B",
|
||||
"evaluation": {
|
||||
"accuracy": 0.464
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_A_B', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blimp",
|
||||
"dataset_config_name": "adjunct_island",
|
||||
"template_name": "grammatical_which_one_1_2",
|
||||
"evaluation": {
|
||||
"accuracy": 0.512
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_which_one_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blimp",
|
||||
"dataset_config_name": "adjunct_island",
|
||||
"template_name": "single_sentence_bad_yes_no",
|
||||
"evaluation": {
|
||||
"accuracy": 0.52
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_bad_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "blimp",
|
||||
"dataset_config_name": "adjunct_island",
|
||||
"template_name": "single_sentence_good_yes_no",
|
||||
"evaluation": {
|
||||
"accuracy": 0.493
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_good_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "climate_fever",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "claim_and_all_supporting_evidences",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3166123778501629
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='claim_and_all_supporting_evidences', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "climate_fever",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "fifth_evidence_and_claim_itemization",
|
||||
"evaluation": {
|
||||
"accuracy": 0.4749185667752443
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='fifth_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "climate_fever",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "first_evidence_and_claim_itemization",
|
||||
"evaluation": {
|
||||
"accuracy": 0.22996742671009773
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='first_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "climate_fever",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "second_evidence_and_claim_itemization",
|
||||
"evaluation": {
|
||||
"accuracy": 0.24625407166123778
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='second_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "climate_fever",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "third_evidence_claim_pair",
|
||||
"evaluation": {
|
||||
"accuracy": 0.24234527687296417
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='third_evidence_claim_pair', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "codah",
|
||||
"dataset_config_name": "codah",
|
||||
"template_name": "affirmative_instruction_after_sentence_and_choices",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6693083573487032
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "codah",
|
||||
"dataset_config_name": "codah",
|
||||
"template_name": "affirmative_instruction_before_sentence_and_choices",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6509365994236311
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_before_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "codah",
|
||||
"dataset_config_name": "codah",
|
||||
"template_name": "interrogative_instruction_after_sentence_and_choices",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6761527377521613
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='interrogative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "commonsense_qa",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "answer_given_question_without_options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6388206388206388
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_given_question_without_options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "commonsense_qa",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "most_suitable_answer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7313677313677314
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='most_suitable_answer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "commonsense_qa",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "question_answering",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7158067158067158
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='question_answering', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "conv_ai_3",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "ambiguous",
|
||||
"evaluation": {
|
||||
"accuracy": 0.39040207522697795
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='ambiguous', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "conv_ai_3",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "clarification_needed",
|
||||
"evaluation": {
|
||||
"accuracy": 0.39040207522697795
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='clarification_needed', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "conv_ai_3",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "directly_answer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6095979247730221
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='directly_answer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "conv_ai_3",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "score_give_number",
|
||||
"evaluation": {
|
||||
"accuracy": 0.057933419801124084
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_give_number', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "conv_ai_3",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "score_how_much",
|
||||
"evaluation": {
|
||||
"accuracy": 0.010376134889753566
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_how_much', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "craigslist_bargains",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "best deal",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5192629815745393
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='craigslist_bargains', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best deal', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "craigslist_bargains",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "good deal for seller",
|
||||
"evaluation": {
|
||||
"accuracy": 0.2529313232830821
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='craigslist_bargains', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='good deal for seller', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "craigslist_bargains",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "good deal for seller no list price",
|
||||
"evaluation": {
|
||||
"accuracy": 0.09715242881072027
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='craigslist_bargains', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='good deal for seller no list price', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "craigslist_bargains",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "good deal for seller no list price implicit",
|
||||
"evaluation": {
|
||||
"accuracy": 0.24623115577889448
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='craigslist_bargains', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='good deal for seller no list price implicit', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "emotion",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "answer_question_with_emotion_label",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3375
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='emotion', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='answer_question_with_emotion_label', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "emotion",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "answer_with_class_label",
|
||||
"evaluation": {
|
||||
"accuracy": 0.214
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='emotion', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='answer_with_class_label', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "emotion",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "choose_the_best_emotion_label",
|
||||
"evaluation": {
|
||||
"accuracy": 0.312
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='emotion', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='choose_the_best_emotion_label', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "emotion",
|
||||
"dataset_config_name": null,
|
||||
"template_name": "reply_with_emoation_label",
|
||||
"evaluation": {
|
||||
"accuracy": 0.4495
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='emotion', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='reply_with_emoation_label', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bd372ab32cfb273ee9a6e526c9d99cefa19711eb95cfa5f5b6f2c1287312e0d2
|
||||
size 8972520
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:95244bede560e3967e010b62bdc07bcccb7286ba9ec91f0ba7319010de183ecc
|
||||
size 9028288
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ce82a11301357ee05915d24a130a33931deb425ad8baa4189fe628b588100854
|
||||
size 9272950
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f1faa39e152afdb8123e501b0b08280e7be04fad2b944bbff9aed8b063c0156d
|
||||
size 3114901
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fe88f3b501cf933a7d15898f3d0b307725981ada99a2d501a034e7e17ca98dd
|
||||
size 2973455
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ab6fd8d0a6fcb0fc245cf1ac3b57c03781fad7c39b6f77975c0a5d53a21649a0
|
||||
size 3111377
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ca69af50902f2ecd90e2b41201c3712e27fce9c6b7158bd65f46d3c4b5b1eba7
|
||||
size 3034252
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:15956ffeb8e1ec731e2900fd68204d3936d31807e64a29d0712f3357f56957cc
|
||||
size 2613841
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:89d9d602d726bb0821c710b7b32b63c2813c0116d03965059be24fd5923ae79d
|
||||
size 2722728
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a48cc4625e95ae013239f8c20fc859392d9a2ed2a346150b2fdd6063c2e870a0
|
||||
size 3020414
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c18300f469840d070cc8d39fae6ab0df82bde890c5eef10b6e6767792381478
|
||||
size 3013171
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0cae8ada7278a7273ac2ddebe6f4f3b2a909f4b02f43ea0c4927c432eb2ede13
|
||||
size 2473909
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:562f7049dd5e17a2eabc42030da658f4c3176ccd75bac3297055aea791c86587
|
||||
size 2579931
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9fe075dbd2400c25596e0f07943d2ef01c0cb29728ea2fea9074bde069e83a76
|
||||
size 2951839
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e96ffbc6423c60e7ff34c31e28f3ea95ea7e9e79c71c2bf6cd0a094ac8592d1a
|
||||
size 2820700
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c0df7c80e772bfeff18b8e494ac8600ea0b1f1d1b9e05a2cd7b306495d6ec4e0
|
||||
size 2950072
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:769fd47551c4c4cd8692f52ae39e1982fe04c8c84da0412501dc351a166d130f
|
||||
size 2870274
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d495b77461e9eca518a4cb42f37eba56ad258c201ada004636a64a0409c69f66
|
||||
size 2863552
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2b6cdba90d5b9da0850021ed44f26a0f849022b96d9dc85afa3ae5262eb08ee7
|
||||
size 2865803
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0c3871387b7d5f67a59022d57cb99409a187770143a22713fde2d44809364827
|
||||
size 2378711
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4f4ebb076e4c2fabd5a2ea2105f9fa3d589c7a76e41e169aa02e776cbd92f08c
|
||||
size 2446695
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "financial_phrasebank",
|
||||
"dataset_config_name": "sentences_allagree",
|
||||
"template_name": "bullish_neutral_bearish",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3878091872791519
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='sentences_allagree', dataset_name='financial_phrasebank', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='bullish_neutral_bearish', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "financial_phrasebank",
|
||||
"dataset_config_name": "sentences_allagree",
|
||||
"template_name": "complementary_industries",
|
||||
"evaluation": {
|
||||
"accuracy": 0.10114840989399293
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='sentences_allagree', dataset_name='financial_phrasebank', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='complementary_industries', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "financial_phrasebank",
|
||||
"dataset_config_name": "sentences_allagree",
|
||||
"template_name": "sentiment",
|
||||
"evaluation": {
|
||||
"accuracy": 0.35644876325088337
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='sentences_allagree', dataset_name='financial_phrasebank', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=4, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='sentiment', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user