初始化项目,由ModelHub XC社区提供模型

Model: bigscience/bloomz-7b1-p3
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-15 07:40:14 +08:00
commit 78a6661ff1
634 changed files with 7477 additions and 0 deletions

75
.gitattributes vendored Normal file
View File

@@ -0,0 +1,75 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*.tfevents* filter=lfs diff=lfs merge=lfs -text
*.db* filter=lfs diff=lfs merge=lfs -text
*.ark* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.gguf* filter=lfs diff=lfs merge=lfs -text
*.ggml filter=lfs diff=lfs merge=lfs -text
*.llamafile* filter=lfs diff=lfs merge=lfs -text
*.pt2 filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl filter=lfs diff=lfs merge=lfs -text
evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl filter=lfs diff=lfs merge=lfs -text
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
tokenizer.json filter=lfs diff=lfs merge=lfs -text
model.safetensors filter=lfs diff=lfs merge=lfs -text

882
README.md Normal file
View File

@@ -0,0 +1,882 @@
---
datasets:
- Muennighoff/P3
license: bigscience-bloom-rail-1.0
language:
- ak
- ar
- as
- bm
- bn
- ca
- code
- en
- es
- eu
- fon
- fr
- gu
- hi
- id
- ig
- ki
- kn
- lg
- ln
- ml
- mr
- ne
- nso
- ny
- or
- pa
- pt
- rn
- rw
- sn
- st
- sw
- ta
- te
- tn
- ts
- tum
- tw
- ur
- vi
- wo
- xh
- yo
- zh
- zu
programming_language:
- C
- C++
- C#
- Go
- Java
- JavaScript
- Lua
- PHP
- Python
- Ruby
- Rust
- Scala
- TypeScript
pipeline_tag: text-generation
widget:
- text: "一个传奇的开端一个不灭的神话这不仅仅是一部电影而是作为一个走进新时代的标签永远彪炳史册。Would you rate the previous review as positive, neutral or negative?"
example_title: "zh-en sentiment"
- text: "一个传奇的开端,一个不灭的神话,这不仅仅是一部电影,而是作为一个走进新时代的标签,永远彪炳史册。你认为这句话的立场是赞扬、中立还是批评?"
example_title: "zh-zh sentiment"
- text: "Suggest at least five related search terms to \"Mạng neural nhân tạo\"."
example_title: "vi-en query"
- text: "Proposez au moins cinq mots clés concernant «Réseau de neurones artificiels»."
example_title: "fr-fr query"
- text: "Explain in a sentence in Telugu what is backpropagation in neural networks."
example_title: "te-en qa"
- text: "Why is the sky blue?"
example_title: "en-en qa"
- text: "Write a fairy tale about a troll saving a princess from a dangerous dragon. The fairy tale is a masterpiece that has achieved praise worldwide and its moral is \"Heroes Come in All Shapes and Sizes\". Story (in Spanish):"
example_title: "es-en fable"
- text: "Write a fable about wood elves living in a forest that is suddenly invaded by ogres. The fable is a masterpiece that has achieved praise worldwide and its moral is \"Violence is the last refuge of the incompetent\". Fable (in Hindi):"
example_title: "hi-en fable"
model-index:
- name: bloomz-7b1-p3
results:
- task:
type: Coreference resolution
dataset:
type: winogrande
name: Winogrande XL (xl)
config: xl
split: validation
revision: a80f460359d1e9a67c006011c94de42a8759430c
metrics:
- type: Accuracy
value: 54.06
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (en)
config: en
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 53.72
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (fr)
config: fr
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 55.42
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (jp)
config: jp
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 51.93
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (pt)
config: pt
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 53.99
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (ru)
config: ru
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 53.97
- task:
type: Coreference resolution
dataset:
type: Muennighoff/xwinograd
name: XWinograd (zh)
config: zh
split: test
revision: 9dd5ea5505fad86b7bedad667955577815300cee
metrics:
- type: Accuracy
value: 52.98
- task:
type: Natural language inference
dataset:
type: anli
name: ANLI (r1)
config: r1
split: validation
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
metrics:
- type: Accuracy
value: 35.1
- task:
type: Natural language inference
dataset:
type: anli
name: ANLI (r2)
config: r2
split: validation
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
metrics:
- type: Accuracy
value: 35.4
- task:
type: Natural language inference
dataset:
type: anli
name: ANLI (r3)
config: r3
split: validation
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
metrics:
- type: Accuracy
value: 37.58
- task:
type: Natural language inference
dataset:
type: super_glue
name: SuperGLUE (cb)
config: cb
split: validation
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
metrics:
- type: Accuracy
value: 62.5
- task:
type: Natural language inference
dataset:
type: super_glue
name: SuperGLUE (rte)
config: rte
split: validation
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
metrics:
- type: Accuracy
value: 78.7
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (ar)
config: ar
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 50.64
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (bg)
config: bg
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 43.98
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (de)
config: de
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 47.03
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (el)
config: el
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 41.89
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (en)
config: en
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 55.9
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (es)
config: es
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 53.73
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (fr)
config: fr
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 53.37
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (hi)
config: hi
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 49.84
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (ru)
config: ru
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 46.55
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (sw)
config: sw
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 43.49
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (th)
config: th
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 43.17
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (tr)
config: tr
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 40.44
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (ur)
config: ur
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 45.18
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (vi)
config: vi
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 51.97
- task:
type: Natural language inference
dataset:
type: xnli
name: XNLI (zh)
config: zh
split: validation
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
metrics:
- type: Accuracy
value: 52.29
- task:
type: Program synthesis
dataset:
type: openai_humaneval
name: HumanEval
config: None
split: test
revision: e8dc562f5de170c54b5481011dd9f4fa04845771
metrics:
- type: Pass@1
value: 1.55
- type: Pass@10
value: 4.12
- type: Pass@100
value: 9.60
- task:
type: Sentence completion
dataset:
type: story_cloze
name: StoryCloze (2016)
config: "2016"
split: validation
revision: e724c6f8cdf7c7a2fb229d862226e15b023ee4db
metrics:
- type: Accuracy
value: 87.07
- task:
type: Sentence completion
dataset:
type: super_glue
name: SuperGLUE (copa)
config: copa
split: validation
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
metrics:
- type: Accuracy
value: 81.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (et)
config: et
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 57.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (ht)
config: ht
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 56.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (id)
config: id
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 70.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (it)
config: it
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 60.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (qu)
config: qu
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 54.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (sw)
config: sw
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 62.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (ta)
config: ta
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 71.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (th)
config: th
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 63.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (tr)
config: tr
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 58.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (vi)
config: vi
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 67.0
- task:
type: Sentence completion
dataset:
type: xcopa
name: XCOPA (zh)
config: zh
split: validation
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
metrics:
- type: Accuracy
value: 79.0
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (ar)
config: ar
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 78.69
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (es)
config: es
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 82.93
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (eu)
config: eu
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 70.42
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (hi)
config: hi
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 72.2
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (id)
config: id
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 77.1
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (my)
config: my
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 51.49
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (ru)
config: ru
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 66.45
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (sw)
config: sw
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 60.82
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (te)
config: te
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 63.14
- task:
type: Sentence completion
dataset:
type: Muennighoff/xstory_cloze
name: XStoryCloze (zh)
config: zh
split: validation
revision: 8bb76e594b68147f1a430e86829d07189622b90d
metrics:
- type: Accuracy
value: 80.34
---
![xmtf](https://github.com/bigscience-workshop/xmtf/blob/master/xmtf_banner.png?raw=true)
# Table of Contents
1. [Model Summary](#model-summary)
2. [Use](#use)
3. [Limitations](#limitations)
4. [Training](#training)
5. [Evaluation](#evaluation)
7. [Citation](#citation)
# Model Summary
> We present BLOOMZ & mT0, a family of models capable of following human instructions in dozens of languages zero-shot. We finetune BLOOM & mT5 pretrained multilingual language models on our crosslingual task mixture (xP3) and find the resulting models capable of crosslingual generalization to unseen tasks & languages.
- **Repository:** [bigscience-workshop/xmtf](https://github.com/bigscience-workshop/xmtf)
- **Paper:** [Crosslingual Generalization through Multitask Finetuning](https://arxiv.org/abs/2211.01786)
- **Point of Contact:** [Niklas Muennighoff](mailto:niklas@hf.co)
- **Languages:** Refer to [bloom](https://huggingface.co/bigscience/bloom) for pretraining & [xP3](https://huggingface.co/datasets/bigscience/xP3) for finetuning language proportions. It understands both pretraining & finetuning languages.
- **BLOOMZ & mT0 Model Family:**
<div class="max-w-full overflow-auto">
<table>
<tr>
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/bigscience/xP3>xP3</a>. Recommended for prompting in English.
</tr>
<tr>
<td>Parameters</td>
<td>300M</td>
<td>580M</td>
<td>1.2B</td>
<td>3.7B</td>
<td>13B</td>
<td>560M</td>
<td>1.1B</td>
<td>1.7B</td>
<td>3B</td>
<td>7.1B</td>
<td>176B</td>
</tr>
<tr>
<td>Finetuned Model</td>
<td><a href=https://huggingface.co/bigscience/mt0-small>mt0-small</a></td>
<td><a href=https://huggingface.co/bigscience/mt0-base>mt0-base</a></td>
<td><a href=https://huggingface.co/bigscience/mt0-large>mt0-large</a></td>
<td><a href=https://huggingface.co/bigscience/mt0-xl>mt0-xl</a></td>
<td><a href=https://huggingface.co/bigscience/mt0-xxl>mt0-xxl</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-560m>bloomz-560m</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-1b1>bloomz-1b1</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-1b7>bloomz-1b7</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-3b>bloomz-3b</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-7b1>bloomz-7b1</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz>bloomz</a></td>
</tr>
</tr>
<tr>
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/bigscience/xP3mt>xP3mt</a>. Recommended for prompting in non-English.</th>
</tr>
<tr>
<td>Finetuned Model</td>
<td></td>
<td></td>
<td></td>
<td></td>
<td><a href=https://huggingface.co/bigscience/mt0-xxl-mt>mt0-xxl-mt</a></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td><a href=https://huggingface.co/bigscience/bloomz-7b1-mt>bloomz-7b1-mt</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-mt>bloomz-mt</a></td>
</tr>
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/Muennighoff/P3>P3</a>. Released for research purposes only. Strictly inferior to above models!</th>
</tr>
<tr>
<td>Finetuned Model</td>
<td></td>
<td></td>
<td></td>
<td></td>
<td><a href=https://huggingface.co/bigscience/mt0-xxl-p3>mt0-xxl-p3</a></td>
<td></td>
<td></td>
<td></td>
<td></td>
<td><a href=https://huggingface.co/bigscience/bloomz-7b1-p3>bloomz-7b1-p3</a></td>
<td><a href=https://huggingface.co/bigscience/bloomz-p3>bloomz-p3</a></td>
</tr>
<th colspan="12">Original pretrained checkpoints. Not recommended.</th>
<tr>
<td>Pretrained Model</td>
<td><a href=https://huggingface.co/google/mt5-small>mt5-small</a></td>
<td><a href=https://huggingface.co/google/mt5-base>mt5-base</a></td>
<td><a href=https://huggingface.co/google/mt5-large>mt5-large</a></td>
<td><a href=https://huggingface.co/google/mt5-xl>mt5-xl</a></td>
<td><a href=https://huggingface.co/google/mt5-xxl>mt5-xxl</a></td>
<td><a href=https://huggingface.co/bigscience/bloom-560m>bloom-560m</a></td>
<td><a href=https://huggingface.co/bigscience/bloom-1b1>bloom-1b1</a></td>
<td><a href=https://huggingface.co/bigscience/bloom-1b7>bloom-1b7</a></td>
<td><a href=https://huggingface.co/bigscience/bloom-3b>bloom-3b</a></td>
<td><a href=https://huggingface.co/bigscience/bloom-7b1>bloom-7b1</a></td>
<td><a href=https://huggingface.co/bigscience/bloom>bloom</a></td>
</tr>
</table>
</div>
# Use
## Intended use
We recommend using the model to perform tasks expressed in natural language. For example, given the prompt "*Translate to English: Je taime.*", the model will most likely answer "*I love you.*". Some prompt ideas from our paper:
- 一个传奇的开端,一个不灭的神话,这不仅仅是一部电影,而是作为一个走进新时代的标签,永远彪炳史册。你认为这句话的立场是赞扬、中立还是批评?
- Suggest at least five related search terms to "Mạng neural nhân tạo".
- Write a fairy tale about a troll saving a princess from a dangerous dragon. The fairy tale is a masterpiece that has achieved praise worldwide and its moral is "Heroes Come in All Shapes and Sizes". Story (in Spanish):
- Explain in a sentence in Telugu what is backpropagation in neural networks.
**Feel free to share your generations in the Community tab!**
## How to use
### CPU
<details>
<summary> Click to expand </summary>
```python
# pip install -q transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "bigscience/bloomz-7b1-p3"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)
inputs = tokenizer.encode("Translate to English: Je taime.", return_tensors="pt")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
```
</details>
### GPU
<details>
<summary> Click to expand </summary>
```python
# pip install -q transformers accelerate
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "bigscience/bloomz-7b1-p3"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
inputs = tokenizer.encode("Translate to English: Je taime.", return_tensors="pt").to("cuda")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
```
</details>
### GPU in 8bit
<details>
<summary> Click to expand </summary>
```python
# pip install -q transformers accelerate bitsandbytes
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "bigscience/bloomz-7b1-p3"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", load_in_8bit=True)
inputs = tokenizer.encode("Translate to English: Je taime.", return_tensors="pt").to("cuda")
outputs = model.generate(inputs)
print(tokenizer.decode(outputs[0]))
```
</details>
<!-- Necessary for whitespace -->
###
# Limitations
**Prompt Engineering:** The performance may vary depending on the prompt. For BLOOMZ models, we recommend making it very clear when the input stops to avoid the model trying to continue it. For example, the prompt "*Translate to English: Je t'aime*" without the full stop (.) at the end, may result in the model trying to continue the French sentence. Better prompts are e.g. "*Translate to English: Je t'aime.*", "*Translate to English: Je t'aime. Translation:*" "*What is "Je t'aime." in English?*", where it is clear for the model when it should answer. Further, we recommend providing the model as much context as possible. For example, if you want it to answer in Telugu, then tell the model, e.g. "*Explain in a sentence in Telugu what is backpropagation in neural networks.*".
# Training
## Model
- **Architecture:** Same as [bloom-7b1](https://huggingface.co/bigscience/bloom-7b1), also refer to the `config.json` file
- **Finetuning steps:** 1000
- **Finetuning tokens:** 4.19 billion
- **Finetuning layout:** 1x pipeline parallel, 1x tensor parallel, 64x data parallel
- **Precision:** float16
## Hardware
- **CPUs:** AMD CPUs with 512GB memory per node
- **GPUs:** 64 A100 80GB GPUs with 8 GPUs per node (8 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links
- **Communication:** NCCL-communications network with a fully dedicated subnet
## Software
- **Orchestration:** [Megatron-DeepSpeed](https://github.com/bigscience-workshop/Megatron-DeepSpeed)
- **Optimizer & parallelism:** [DeepSpeed](https://github.com/microsoft/DeepSpeed)
- **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch) (pytorch-1.11 w/ CUDA-11.5)
- **FP16 if applicable:** [apex](https://github.com/NVIDIA/apex)
# Evaluation
We refer to Table 7 from our [paper](https://arxiv.org/abs/2211.01786) & [bigscience/evaluation-results](https://huggingface.co/datasets/bigscience/evaluation-results) for zero-shot results on unseen tasks. The sidebar reports zero-shot performance of the best prompt per dataset config.
# Citation
```bibtex
@misc{muennighoff2022crosslingual,
title={Crosslingual Generalization through Multitask Finetuning},
author={Niklas Muennighoff and Thomas Wang and Lintang Sutawika and Adam Roberts and Stella Biderman and Teven Le Scao and M Saiful Bari and Sheng Shen and Zheng-Xin Yong and Hailey Schoelkopf and Xiangru Tang and Dragomir Radev and Alham Fikri Aji and Khalid Almubarak and Samuel Albanie and Zaid Alyafeai and Albert Webson and Edward Raff and Colin Raffel},
year={2022},
eprint={2211.01786},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```

31
config.json Normal file
View File

@@ -0,0 +1,31 @@
{
"apply_residual_connection_post_layernorm": false,
"architectures": [
"BloomForCausalLM"
],
"attention_dropout": 0.0,
"attention_softmax_in_fp32": true,
"bias_dropout_fusion": true,
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_dropout": 0.0,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"masked_softmax_fusion": true,
"model_type": "bloom",
"n_embed": 4096,
"n_inner": null,
"n_layer": 30,
"num_attention_heads": 32,
"offset_alibi": 100,
"pad_token_id": 3,
"pretraining_tp": 4,
"seq_length": 2048,
"skip_bias_add": true,
"skip_bias_add_qkv": false,
"slow_but_exact": false,
"transformers_version": "4.21.0.dev0",
"unk_token_id": 0,
"use_cache": true,
"vocab_size": 250880
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7518199867637326
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7749834546657842
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.586366644606221
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7518199867637326
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "ar",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7438782263401721
},
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7835870284579749
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.8292521508934481
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6399735274652548
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7935142289874255
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "es",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7888815354070152
},
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7041694242223693
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.6823295830575777
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.5625413633355394
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.6671078755790867
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "eu",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.671740569159497
},
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.6915949702183984
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7220383851753805
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.5883520847121112
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.6743878226340172
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "hi",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.6816677696889477
},
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7445400397088021
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.771012574454004
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6029119788219722
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7485109199205824
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "id",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7438782263401721
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.7610853739245532
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.7961614824619457
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.6214427531436135
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.7696889477167439
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xstory_cloze",
"dataset_config_name": "zh",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.7670416942422237
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5225806451612903
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.48946236559139783
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5281720430107527
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.5062365591397849
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "en",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5372043010752688
},
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5060240963855421
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5421686746987951
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5542168674698795
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.4819277108433735
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "fr",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5301204819277109
},
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5133079847908745
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.4714828897338403
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5209125475285171
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.5019011406844106
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "pt",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5399239543726235
},
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5257936507936508
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5297619047619048
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5218253968253969
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.4444444444444444
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "Muennighoff/xwinograd",
"dataset_config_name": "zh",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5198412698412699
},
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.351
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.334
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.351
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.288
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r1",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.345
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.339
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.354
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.297
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r2",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.345
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.37583333333333335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.3408333333333333
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.36333333333333334
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.31083333333333335
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "anli",
"dataset_config_name": "dev_r3",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.34
},
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,194 @@
dataset,prompt,metric,value
anli_dev_r1,GPT-3 style,accuracy,0.351
anli_dev_r1,MNLI crowdsource,accuracy,0.334
anli_dev_r1,can we infer,accuracy,0.351
anli_dev_r1,guaranteed/possible/impossible,accuracy,0.288
anli_dev_r1,justified in saying,accuracy,0.345
anli_dev_r1,median,accuracy,0.345
anli_dev_r2,GPT-3 style,accuracy,0.339
anli_dev_r2,MNLI crowdsource,accuracy,0.335
anli_dev_r2,can we infer,accuracy,0.354
anli_dev_r2,guaranteed/possible/impossible,accuracy,0.297
anli_dev_r2,justified in saying,accuracy,0.345
anli_dev_r2,median,accuracy,0.339
anli_dev_r3,GPT-3 style,accuracy,0.37583333333333335
anli_dev_r3,MNLI crowdsource,accuracy,0.3408333333333333
anli_dev_r3,can we infer,accuracy,0.36333333333333334
anli_dev_r3,guaranteed/possible/impossible,accuracy,0.31083333333333335
anli_dev_r3,justified in saying,accuracy,0.34
anli_dev_r3,median,accuracy,0.3408333333333333
story_cloze_2016,Answer Given options,accuracy,0.8305718866916088
story_cloze_2016,Choose Story Ending,accuracy,0.8706574024585783
story_cloze_2016,Generate Ending,accuracy,0.7183324425440941
story_cloze_2016,Novel Correct Ending,accuracy,0.848743987172635
story_cloze_2016,Story Continuation and Options,accuracy,0.8466060929983966
story_cloze_2016,median,accuracy,0.8466060929983966
super_glue_cb,GPT-3 style,accuracy,0.625
super_glue_cb,MNLI crowdsource,accuracy,0.08928571428571429
super_glue_cb,can we infer,accuracy,0.5892857142857143
super_glue_cb,guaranteed/possible/impossible,accuracy,0.5
super_glue_cb,justified in saying,accuracy,0.5357142857142857
super_glue_cb,median,accuracy,0.5357142857142857
super_glue_copa,"C1 or C2? premise, so/because…",accuracy,0.66
super_glue_copa,best_option,accuracy,0.67
super_glue_copa,cause_effect,accuracy,0.78
super_glue_copa,i_am_hesitating,accuracy,0.8
super_glue_copa,plausible_alternatives,accuracy,0.81
super_glue_copa,median,accuracy,0.78
super_glue_rte,GPT-3 style,accuracy,0.7870036101083032
super_glue_rte,MNLI crowdsource,accuracy,0.7220216606498195
super_glue_rte,does it follow that,accuracy,0.6678700361010831
super_glue_rte,guaranteed true,accuracy,0.6714801444043321
super_glue_rte,should assume,accuracy,0.6678700361010831
super_glue_rte,median,accuracy,0.6714801444043321
winogrande_winogrande_xl,Replace,accuracy,0.5406471981057617
winogrande_winogrande_xl,True or False,accuracy,0.5074980268350434
winogrande_winogrande_xl,does underscore refer to,accuracy,0.5177584846093133
winogrande_winogrande_xl,stand for,accuracy,0.510655090765588
winogrande_winogrande_xl,underscore refer to,accuracy,0.5256511444356748
winogrande_winogrande_xl,median,accuracy,0.5177584846093133
xcopa_id,"C1 or C2? premise, so/because…",accuracy,0.47
xcopa_id,best_option,accuracy,0.51
xcopa_id,cause_effect,accuracy,0.65
xcopa_id,i_am_hesitating,accuracy,0.66
xcopa_id,plausible_alternatives,accuracy,0.67
xcopa_id,median,accuracy,0.65
xcopa_sw,"C1 or C2? premise, so/because…",accuracy,0.58
xcopa_sw,best_option,accuracy,0.57
xcopa_sw,cause_effect,accuracy,0.46
xcopa_sw,i_am_hesitating,accuracy,0.48
xcopa_sw,plausible_alternatives,accuracy,0.45
xcopa_sw,median,accuracy,0.48
xcopa_ta,"C1 or C2? premise, so/because…",accuracy,0.57
xcopa_ta,best_option,accuracy,0.67
xcopa_ta,cause_effect,accuracy,0.71
xcopa_ta,i_am_hesitating,accuracy,0.71
xcopa_ta,plausible_alternatives,accuracy,0.69
xcopa_ta,median,accuracy,0.69
xcopa_vi,"C1 or C2? premise, so/because…",accuracy,0.55
xcopa_vi,best_option,accuracy,0.61
xcopa_vi,cause_effect,accuracy,0.67
xcopa_vi,i_am_hesitating,accuracy,0.66
xcopa_vi,plausible_alternatives,accuracy,0.65
xcopa_vi,median,accuracy,0.65
xcopa_zh,"C1 or C2? premise, so/because…",accuracy,0.62
xcopa_zh,best_option,accuracy,0.61
xcopa_zh,cause_effect,accuracy,0.77
xcopa_zh,i_am_hesitating,accuracy,0.72
xcopa_zh,plausible_alternatives,accuracy,0.74
xcopa_zh,median,accuracy,0.72
xnli_ar,GPT-3 style,accuracy,0.5040160642570282
xnli_ar,MNLI crowdsource,accuracy,0.39879518072289155
xnli_ar,can we infer,accuracy,0.506425702811245
xnli_ar,guaranteed/possible/impossible,accuracy,0.4799196787148594
xnli_ar,justified in saying,accuracy,0.41526104417670684
xnli_ar,median,accuracy,0.4799196787148594
xnli_en,GPT-3 style,accuracy,0.5590361445783133
xnli_en,MNLI crowdsource,accuracy,0.342570281124498
xnli_en,can we infer,accuracy,0.5449799196787148
xnli_en,guaranteed/possible/impossible,accuracy,0.41164658634538154
xnli_en,justified in saying,accuracy,0.4634538152610442
xnli_en,median,accuracy,0.4634538152610442
xnli_es,GPT-3 style,accuracy,0.5373493975903615
xnli_es,MNLI crowdsource,accuracy,0.40441767068273093
xnli_es,can we infer,accuracy,0.5277108433734939
xnli_es,guaranteed/possible/impossible,accuracy,0.44216867469879517
xnli_es,justified in saying,accuracy,0.4534136546184739
xnli_es,median,accuracy,0.4534136546184739
xnli_fr,GPT-3 style,accuracy,0.5248995983935743
xnli_fr,MNLI crowdsource,accuracy,0.3895582329317269
xnli_fr,can we infer,accuracy,0.5337349397590362
xnli_fr,guaranteed/possible/impossible,accuracy,0.42971887550200805
xnli_fr,justified in saying,accuracy,0.4738955823293173
xnli_fr,median,accuracy,0.4738955823293173
xnli_hi,GPT-3 style,accuracy,0.4983935742971888
xnli_hi,MNLI crowdsource,accuracy,0.38714859437751004
xnli_hi,can we infer,accuracy,0.45542168674698796
xnli_hi,guaranteed/possible/impossible,accuracy,0.41405622489959837
xnli_hi,justified in saying,accuracy,0.38795180722891565
xnli_hi,median,accuracy,0.41405622489959837
xnli_sw,GPT-3 style,accuracy,0.43493975903614457
xnli_sw,MNLI crowdsource,accuracy,0.363855421686747
xnli_sw,can we infer,accuracy,0.42891566265060244
xnli_sw,guaranteed/possible/impossible,accuracy,0.3457831325301205
xnli_sw,justified in saying,accuracy,0.3650602409638554
xnli_sw,median,accuracy,0.3650602409638554
xnli_ur,GPT-3 style,accuracy,0.43493975903614457
xnli_ur,MNLI crowdsource,accuracy,0.3895582329317269
xnli_ur,can we infer,accuracy,0.45180722891566266
xnli_ur,guaranteed/possible/impossible,accuracy,0.40120481927710844
xnli_ur,justified in saying,accuracy,0.37630522088353413
xnli_ur,median,accuracy,0.40120481927710844
xnli_vi,GPT-3 style,accuracy,0.5196787148594377
xnli_vi,MNLI crowdsource,accuracy,0.38112449799196785
xnli_vi,can we infer,accuracy,0.5080321285140562
xnli_vi,guaranteed/possible/impossible,accuracy,0.38393574297188754
xnli_vi,justified in saying,accuracy,0.43614457831325304
xnli_vi,median,accuracy,0.43614457831325304
xnli_zh,GPT-3 style,accuracy,0.5052208835341365
xnli_zh,MNLI crowdsource,accuracy,0.4
xnli_zh,can we infer,accuracy,0.5228915662650603
xnli_zh,guaranteed/possible/impossible,accuracy,0.4738955823293173
xnli_zh,justified in saying,accuracy,0.45863453815261046
xnli_zh,median,accuracy,0.4738955823293173
xstory_cloze_ar,Answer Given options,accuracy,0.7518199867637326
xstory_cloze_ar,Choose Story Ending,accuracy,0.7749834546657842
xstory_cloze_ar,Generate Ending,accuracy,0.586366644606221
xstory_cloze_ar,Novel Correct Ending,accuracy,0.7518199867637326
xstory_cloze_ar,Story Continuation and Options,accuracy,0.7438782263401721
xstory_cloze_ar,median,accuracy,0.7518199867637326
xstory_cloze_es,Answer Given options,accuracy,0.7835870284579749
xstory_cloze_es,Choose Story Ending,accuracy,0.8292521508934481
xstory_cloze_es,Generate Ending,accuracy,0.6399735274652548
xstory_cloze_es,Novel Correct Ending,accuracy,0.7935142289874255
xstory_cloze_es,Story Continuation and Options,accuracy,0.7888815354070152
xstory_cloze_es,median,accuracy,0.7888815354070152
xstory_cloze_eu,Answer Given options,accuracy,0.7041694242223693
xstory_cloze_eu,Choose Story Ending,accuracy,0.6823295830575777
xstory_cloze_eu,Generate Ending,accuracy,0.5625413633355394
xstory_cloze_eu,Novel Correct Ending,accuracy,0.6671078755790867
xstory_cloze_eu,Story Continuation and Options,accuracy,0.671740569159497
xstory_cloze_eu,median,accuracy,0.671740569159497
xstory_cloze_hi,Answer Given options,accuracy,0.6915949702183984
xstory_cloze_hi,Choose Story Ending,accuracy,0.7220383851753805
xstory_cloze_hi,Generate Ending,accuracy,0.5883520847121112
xstory_cloze_hi,Novel Correct Ending,accuracy,0.6743878226340172
xstory_cloze_hi,Story Continuation and Options,accuracy,0.6816677696889477
xstory_cloze_hi,median,accuracy,0.6816677696889477
xstory_cloze_id,Answer Given options,accuracy,0.7445400397088021
xstory_cloze_id,Choose Story Ending,accuracy,0.771012574454004
xstory_cloze_id,Generate Ending,accuracy,0.6029119788219722
xstory_cloze_id,Novel Correct Ending,accuracy,0.7485109199205824
xstory_cloze_id,Story Continuation and Options,accuracy,0.7438782263401721
xstory_cloze_id,median,accuracy,0.7445400397088021
xstory_cloze_zh,Answer Given options,accuracy,0.7610853739245532
xstory_cloze_zh,Choose Story Ending,accuracy,0.7961614824619457
xstory_cloze_zh,Generate Ending,accuracy,0.6214427531436135
xstory_cloze_zh,Novel Correct Ending,accuracy,0.7696889477167439
xstory_cloze_zh,Story Continuation and Options,accuracy,0.7670416942422237
xstory_cloze_zh,median,accuracy,0.7670416942422237
xwinograd_en,Replace,accuracy,0.5225806451612903
xwinograd_en,True or False,accuracy,0.48946236559139783
xwinograd_en,does underscore refer to,accuracy,0.5281720430107527
xwinograd_en,stand for,accuracy,0.5062365591397849
xwinograd_en,underscore refer to,accuracy,0.5372043010752688
xwinograd_en,median,accuracy,0.5225806451612903
xwinograd_fr,Replace,accuracy,0.5060240963855421
xwinograd_fr,True or False,accuracy,0.5421686746987951
xwinograd_fr,does underscore refer to,accuracy,0.5542168674698795
xwinograd_fr,stand for,accuracy,0.4819277108433735
xwinograd_fr,underscore refer to,accuracy,0.5301204819277109
xwinograd_fr,median,accuracy,0.5301204819277109
xwinograd_pt,Replace,accuracy,0.5133079847908745
xwinograd_pt,True or False,accuracy,0.4714828897338403
xwinograd_pt,does underscore refer to,accuracy,0.5209125475285171
xwinograd_pt,stand for,accuracy,0.5019011406844106
xwinograd_pt,underscore refer to,accuracy,0.5399239543726235
xwinograd_pt,median,accuracy,0.5133079847908745
xwinograd_zh,Replace,accuracy,0.5257936507936508
xwinograd_zh,True or False,accuracy,0.5297619047619048
xwinograd_zh,does underscore refer to,accuracy,0.5218253968253969
xwinograd_zh,stand for,accuracy,0.4444444444444444
xwinograd_zh,underscore refer to,accuracy,0.5198412698412699
xwinograd_zh,median,accuracy,0.5218253968253969
multiple,average,multiple,0.5631550819200618
1 dataset prompt metric value
2 anli_dev_r1 GPT-3 style accuracy 0.351
3 anli_dev_r1 MNLI crowdsource accuracy 0.334
4 anli_dev_r1 can we infer accuracy 0.351
5 anli_dev_r1 guaranteed/possible/impossible accuracy 0.288
6 anli_dev_r1 justified in saying accuracy 0.345
7 anli_dev_r1 median accuracy 0.345
8 anli_dev_r2 GPT-3 style accuracy 0.339
9 anli_dev_r2 MNLI crowdsource accuracy 0.335
10 anli_dev_r2 can we infer accuracy 0.354
11 anli_dev_r2 guaranteed/possible/impossible accuracy 0.297
12 anli_dev_r2 justified in saying accuracy 0.345
13 anli_dev_r2 median accuracy 0.339
14 anli_dev_r3 GPT-3 style accuracy 0.37583333333333335
15 anli_dev_r3 MNLI crowdsource accuracy 0.3408333333333333
16 anli_dev_r3 can we infer accuracy 0.36333333333333334
17 anli_dev_r3 guaranteed/possible/impossible accuracy 0.31083333333333335
18 anli_dev_r3 justified in saying accuracy 0.34
19 anli_dev_r3 median accuracy 0.3408333333333333
20 story_cloze_2016 Answer Given options accuracy 0.8305718866916088
21 story_cloze_2016 Choose Story Ending accuracy 0.8706574024585783
22 story_cloze_2016 Generate Ending accuracy 0.7183324425440941
23 story_cloze_2016 Novel Correct Ending accuracy 0.848743987172635
24 story_cloze_2016 Story Continuation and Options accuracy 0.8466060929983966
25 story_cloze_2016 median accuracy 0.8466060929983966
26 super_glue_cb GPT-3 style accuracy 0.625
27 super_glue_cb MNLI crowdsource accuracy 0.08928571428571429
28 super_glue_cb can we infer accuracy 0.5892857142857143
29 super_glue_cb guaranteed/possible/impossible accuracy 0.5
30 super_glue_cb justified in saying accuracy 0.5357142857142857
31 super_glue_cb median accuracy 0.5357142857142857
32 super_glue_copa C1 or C2? premise, so/because… accuracy 0.66
33 super_glue_copa best_option accuracy 0.67
34 super_glue_copa cause_effect accuracy 0.78
35 super_glue_copa i_am_hesitating accuracy 0.8
36 super_glue_copa plausible_alternatives accuracy 0.81
37 super_glue_copa median accuracy 0.78
38 super_glue_rte GPT-3 style accuracy 0.7870036101083032
39 super_glue_rte MNLI crowdsource accuracy 0.7220216606498195
40 super_glue_rte does it follow that accuracy 0.6678700361010831
41 super_glue_rte guaranteed true accuracy 0.6714801444043321
42 super_glue_rte should assume accuracy 0.6678700361010831
43 super_glue_rte median accuracy 0.6714801444043321
44 winogrande_winogrande_xl Replace accuracy 0.5406471981057617
45 winogrande_winogrande_xl True or False accuracy 0.5074980268350434
46 winogrande_winogrande_xl does underscore refer to accuracy 0.5177584846093133
47 winogrande_winogrande_xl stand for accuracy 0.510655090765588
48 winogrande_winogrande_xl underscore refer to accuracy 0.5256511444356748
49 winogrande_winogrande_xl median accuracy 0.5177584846093133
50 xcopa_id C1 or C2? premise, so/because… accuracy 0.47
51 xcopa_id best_option accuracy 0.51
52 xcopa_id cause_effect accuracy 0.65
53 xcopa_id i_am_hesitating accuracy 0.66
54 xcopa_id plausible_alternatives accuracy 0.67
55 xcopa_id median accuracy 0.65
56 xcopa_sw C1 or C2? premise, so/because… accuracy 0.58
57 xcopa_sw best_option accuracy 0.57
58 xcopa_sw cause_effect accuracy 0.46
59 xcopa_sw i_am_hesitating accuracy 0.48
60 xcopa_sw plausible_alternatives accuracy 0.45
61 xcopa_sw median accuracy 0.48
62 xcopa_ta C1 or C2? premise, so/because… accuracy 0.57
63 xcopa_ta best_option accuracy 0.67
64 xcopa_ta cause_effect accuracy 0.71
65 xcopa_ta i_am_hesitating accuracy 0.71
66 xcopa_ta plausible_alternatives accuracy 0.69
67 xcopa_ta median accuracy 0.69
68 xcopa_vi C1 or C2? premise, so/because… accuracy 0.55
69 xcopa_vi best_option accuracy 0.61
70 xcopa_vi cause_effect accuracy 0.67
71 xcopa_vi i_am_hesitating accuracy 0.66
72 xcopa_vi plausible_alternatives accuracy 0.65
73 xcopa_vi median accuracy 0.65
74 xcopa_zh C1 or C2? premise, so/because… accuracy 0.62
75 xcopa_zh best_option accuracy 0.61
76 xcopa_zh cause_effect accuracy 0.77
77 xcopa_zh i_am_hesitating accuracy 0.72
78 xcopa_zh plausible_alternatives accuracy 0.74
79 xcopa_zh median accuracy 0.72
80 xnli_ar GPT-3 style accuracy 0.5040160642570282
81 xnli_ar MNLI crowdsource accuracy 0.39879518072289155
82 xnli_ar can we infer accuracy 0.506425702811245
83 xnli_ar guaranteed/possible/impossible accuracy 0.4799196787148594
84 xnli_ar justified in saying accuracy 0.41526104417670684
85 xnli_ar median accuracy 0.4799196787148594
86 xnli_en GPT-3 style accuracy 0.5590361445783133
87 xnli_en MNLI crowdsource accuracy 0.342570281124498
88 xnli_en can we infer accuracy 0.5449799196787148
89 xnli_en guaranteed/possible/impossible accuracy 0.41164658634538154
90 xnli_en justified in saying accuracy 0.4634538152610442
91 xnli_en median accuracy 0.4634538152610442
92 xnli_es GPT-3 style accuracy 0.5373493975903615
93 xnli_es MNLI crowdsource accuracy 0.40441767068273093
94 xnli_es can we infer accuracy 0.5277108433734939
95 xnli_es guaranteed/possible/impossible accuracy 0.44216867469879517
96 xnli_es justified in saying accuracy 0.4534136546184739
97 xnli_es median accuracy 0.4534136546184739
98 xnli_fr GPT-3 style accuracy 0.5248995983935743
99 xnli_fr MNLI crowdsource accuracy 0.3895582329317269
100 xnli_fr can we infer accuracy 0.5337349397590362
101 xnli_fr guaranteed/possible/impossible accuracy 0.42971887550200805
102 xnli_fr justified in saying accuracy 0.4738955823293173
103 xnli_fr median accuracy 0.4738955823293173
104 xnli_hi GPT-3 style accuracy 0.4983935742971888
105 xnli_hi MNLI crowdsource accuracy 0.38714859437751004
106 xnli_hi can we infer accuracy 0.45542168674698796
107 xnli_hi guaranteed/possible/impossible accuracy 0.41405622489959837
108 xnli_hi justified in saying accuracy 0.38795180722891565
109 xnli_hi median accuracy 0.41405622489959837
110 xnli_sw GPT-3 style accuracy 0.43493975903614457
111 xnli_sw MNLI crowdsource accuracy 0.363855421686747
112 xnli_sw can we infer accuracy 0.42891566265060244
113 xnli_sw guaranteed/possible/impossible accuracy 0.3457831325301205
114 xnli_sw justified in saying accuracy 0.3650602409638554
115 xnli_sw median accuracy 0.3650602409638554
116 xnli_ur GPT-3 style accuracy 0.43493975903614457
117 xnli_ur MNLI crowdsource accuracy 0.3895582329317269
118 xnli_ur can we infer accuracy 0.45180722891566266
119 xnli_ur guaranteed/possible/impossible accuracy 0.40120481927710844
120 xnli_ur justified in saying accuracy 0.37630522088353413
121 xnli_ur median accuracy 0.40120481927710844
122 xnli_vi GPT-3 style accuracy 0.5196787148594377
123 xnli_vi MNLI crowdsource accuracy 0.38112449799196785
124 xnli_vi can we infer accuracy 0.5080321285140562
125 xnli_vi guaranteed/possible/impossible accuracy 0.38393574297188754
126 xnli_vi justified in saying accuracy 0.43614457831325304
127 xnli_vi median accuracy 0.43614457831325304
128 xnli_zh GPT-3 style accuracy 0.5052208835341365
129 xnli_zh MNLI crowdsource accuracy 0.4
130 xnli_zh can we infer accuracy 0.5228915662650603
131 xnli_zh guaranteed/possible/impossible accuracy 0.4738955823293173
132 xnli_zh justified in saying accuracy 0.45863453815261046
133 xnli_zh median accuracy 0.4738955823293173
134 xstory_cloze_ar Answer Given options accuracy 0.7518199867637326
135 xstory_cloze_ar Choose Story Ending accuracy 0.7749834546657842
136 xstory_cloze_ar Generate Ending accuracy 0.586366644606221
137 xstory_cloze_ar Novel Correct Ending accuracy 0.7518199867637326
138 xstory_cloze_ar Story Continuation and Options accuracy 0.7438782263401721
139 xstory_cloze_ar median accuracy 0.7518199867637326
140 xstory_cloze_es Answer Given options accuracy 0.7835870284579749
141 xstory_cloze_es Choose Story Ending accuracy 0.8292521508934481
142 xstory_cloze_es Generate Ending accuracy 0.6399735274652548
143 xstory_cloze_es Novel Correct Ending accuracy 0.7935142289874255
144 xstory_cloze_es Story Continuation and Options accuracy 0.7888815354070152
145 xstory_cloze_es median accuracy 0.7888815354070152
146 xstory_cloze_eu Answer Given options accuracy 0.7041694242223693
147 xstory_cloze_eu Choose Story Ending accuracy 0.6823295830575777
148 xstory_cloze_eu Generate Ending accuracy 0.5625413633355394
149 xstory_cloze_eu Novel Correct Ending accuracy 0.6671078755790867
150 xstory_cloze_eu Story Continuation and Options accuracy 0.671740569159497
151 xstory_cloze_eu median accuracy 0.671740569159497
152 xstory_cloze_hi Answer Given options accuracy 0.6915949702183984
153 xstory_cloze_hi Choose Story Ending accuracy 0.7220383851753805
154 xstory_cloze_hi Generate Ending accuracy 0.5883520847121112
155 xstory_cloze_hi Novel Correct Ending accuracy 0.6743878226340172
156 xstory_cloze_hi Story Continuation and Options accuracy 0.6816677696889477
157 xstory_cloze_hi median accuracy 0.6816677696889477
158 xstory_cloze_id Answer Given options accuracy 0.7445400397088021
159 xstory_cloze_id Choose Story Ending accuracy 0.771012574454004
160 xstory_cloze_id Generate Ending accuracy 0.6029119788219722
161 xstory_cloze_id Novel Correct Ending accuracy 0.7485109199205824
162 xstory_cloze_id Story Continuation and Options accuracy 0.7438782263401721
163 xstory_cloze_id median accuracy 0.7445400397088021
164 xstory_cloze_zh Answer Given options accuracy 0.7610853739245532
165 xstory_cloze_zh Choose Story Ending accuracy 0.7961614824619457
166 xstory_cloze_zh Generate Ending accuracy 0.6214427531436135
167 xstory_cloze_zh Novel Correct Ending accuracy 0.7696889477167439
168 xstory_cloze_zh Story Continuation and Options accuracy 0.7670416942422237
169 xstory_cloze_zh median accuracy 0.7670416942422237
170 xwinograd_en Replace accuracy 0.5225806451612903
171 xwinograd_en True or False accuracy 0.48946236559139783
172 xwinograd_en does underscore refer to accuracy 0.5281720430107527
173 xwinograd_en stand for accuracy 0.5062365591397849
174 xwinograd_en underscore refer to accuracy 0.5372043010752688
175 xwinograd_en median accuracy 0.5225806451612903
176 xwinograd_fr Replace accuracy 0.5060240963855421
177 xwinograd_fr True or False accuracy 0.5421686746987951
178 xwinograd_fr does underscore refer to accuracy 0.5542168674698795
179 xwinograd_fr stand for accuracy 0.4819277108433735
180 xwinograd_fr underscore refer to accuracy 0.5301204819277109
181 xwinograd_fr median accuracy 0.5301204819277109
182 xwinograd_pt Replace accuracy 0.5133079847908745
183 xwinograd_pt True or False accuracy 0.4714828897338403
184 xwinograd_pt does underscore refer to accuracy 0.5209125475285171
185 xwinograd_pt stand for accuracy 0.5019011406844106
186 xwinograd_pt underscore refer to accuracy 0.5399239543726235
187 xwinograd_pt median accuracy 0.5133079847908745
188 xwinograd_zh Replace accuracy 0.5257936507936508
189 xwinograd_zh True or False accuracy 0.5297619047619048
190 xwinograd_zh does underscore refer to accuracy 0.5218253968253969
191 xwinograd_zh stand for accuracy 0.4444444444444444
192 xwinograd_zh underscore refer to accuracy 0.5198412698412699
193 xwinograd_zh median accuracy 0.5218253968253969
194 multiple average multiple 0.5631550819200618

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Answer Given options",
"evaluation": {
"accuracy": 0.8305718866916088
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Choose Story Ending",
"evaluation": {
"accuracy": 0.8706574024585783
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Generate Ending",
"evaluation": {
"accuracy": 0.7183324425440941
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Novel Correct Ending",
"evaluation": {
"accuracy": 0.848743987172635
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "story_cloze",
"dataset_config_name": "2016",
"template_name": "Story Continuation and Options",
"evaluation": {
"accuracy": 0.8466060929983966
},
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.625
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.08928571428571429
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "can we infer",
"evaluation": {
"accuracy": 0.5892857142857143
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "guaranteed/possible/impossible",
"evaluation": {
"accuracy": 0.5
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "cb",
"template_name": "justified in saying",
"evaluation": {
"accuracy": 0.5357142857142857
},
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise, so/because\u2026', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.66
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "best_option",
"evaluation": {
"accuracy": 0.67
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "cause_effect",
"evaluation": {
"accuracy": 0.78
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "i_am_hesitating",
"evaluation": {
"accuracy": 0.8
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "copa",
"template_name": "plausible_alternatives",
"evaluation": {
"accuracy": 0.81
},
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "GPT-3 style",
"evaluation": {
"accuracy": 0.7870036101083032
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "MNLI crowdsource",
"evaluation": {
"accuracy": 0.7220216606498195
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "does it follow that",
"evaluation": {
"accuracy": 0.6678700361010831
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "guaranteed true",
"evaluation": {
"accuracy": 0.6714801444043321
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "super_glue",
"dataset_config_name": "rte",
"template_name": "should assume",
"evaluation": {
"accuracy": 0.6678700361010831
},
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "Replace",
"evaluation": {
"accuracy": 0.5406471981057617
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "True or False",
"evaluation": {
"accuracy": 0.5074980268350434
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "does underscore refer to",
"evaluation": {
"accuracy": 0.5177584846093133
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "stand for",
"evaluation": {
"accuracy": 0.510655090765588
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "winogrande",
"dataset_config_name": "winogrande_xl",
"template_name": "underscore refer to",
"evaluation": {
"accuracy": 0.5256511444356748
},
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "C1 or C2? premise, so/because\u2026",
"evaluation": {
"accuracy": 0.47
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "best_option",
"evaluation": {
"accuracy": 0.51
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
}

View File

@@ -0,0 +1,9 @@
{
"dataset_name": "xcopa",
"dataset_config_name": "id",
"template_name": "cause_effect",
"evaluation": {
"accuracy": 0.65
},
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
}

Some files were not shown because too many files have changed in this diff Show More