初始化项目,由ModelHub XC社区提供模型
Model: bigscience/bloomz-7b1-p3 Source: Original Platform
This commit is contained in:
75
.gitattributes
vendored
Normal file
75
.gitattributes
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=layman_summ_es.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=palm_prompt.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=mlsum_es.templates=summarise_this_in_es_few_sentences.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:40.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-fr-en.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=gpt3-en-fr.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=version-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=xglm-en-fr-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:43.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-source+target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=a_good_translation-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=version-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=xglm-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_fr_en.templates=a_good_translation-fr-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-09T23:48:38.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-en-hi-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
evaluation_bloomz-7b1-p3/evaluation_val/wmt14_hi_en/examples.limited=3000.model=p31lossseqglobal_step1000.task=wmt14_hi_en.templates=gpt-3-hi-en-target.fewshot=0.batchsize=16.seed=1234.timestamp=2022-09-10T11:48:47.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
882
README.md
Normal file
882
README.md
Normal file
@@ -0,0 +1,882 @@
|
||||
---
|
||||
datasets:
|
||||
- Muennighoff/P3
|
||||
license: bigscience-bloom-rail-1.0
|
||||
language:
|
||||
- ak
|
||||
- ar
|
||||
- as
|
||||
- bm
|
||||
- bn
|
||||
- ca
|
||||
- code
|
||||
- en
|
||||
- es
|
||||
- eu
|
||||
- fon
|
||||
- fr
|
||||
- gu
|
||||
- hi
|
||||
- id
|
||||
- ig
|
||||
- ki
|
||||
- kn
|
||||
- lg
|
||||
- ln
|
||||
- ml
|
||||
- mr
|
||||
- ne
|
||||
- nso
|
||||
- ny
|
||||
- or
|
||||
- pa
|
||||
- pt
|
||||
- rn
|
||||
- rw
|
||||
- sn
|
||||
- st
|
||||
- sw
|
||||
- ta
|
||||
- te
|
||||
- tn
|
||||
- ts
|
||||
- tum
|
||||
- tw
|
||||
- ur
|
||||
- vi
|
||||
- wo
|
||||
- xh
|
||||
- yo
|
||||
- zh
|
||||
- zu
|
||||
programming_language:
|
||||
- C
|
||||
- C++
|
||||
- C#
|
||||
- Go
|
||||
- Java
|
||||
- JavaScript
|
||||
- Lua
|
||||
- PHP
|
||||
- Python
|
||||
- Ruby
|
||||
- Rust
|
||||
- Scala
|
||||
- TypeScript
|
||||
pipeline_tag: text-generation
|
||||
widget:
|
||||
- text: "一个传奇的开端,一个不灭的神话,这不仅仅是一部电影,而是作为一个走进新时代的标签,永远彪炳史册。Would you rate the previous review as positive, neutral or negative?"
|
||||
example_title: "zh-en sentiment"
|
||||
- text: "一个传奇的开端,一个不灭的神话,这不仅仅是一部电影,而是作为一个走进新时代的标签,永远彪炳史册。你认为这句话的立场是赞扬、中立还是批评?"
|
||||
example_title: "zh-zh sentiment"
|
||||
- text: "Suggest at least five related search terms to \"Mạng neural nhân tạo\"."
|
||||
example_title: "vi-en query"
|
||||
- text: "Proposez au moins cinq mots clés concernant «Réseau de neurones artificiels»."
|
||||
example_title: "fr-fr query"
|
||||
- text: "Explain in a sentence in Telugu what is backpropagation in neural networks."
|
||||
example_title: "te-en qa"
|
||||
- text: "Why is the sky blue?"
|
||||
example_title: "en-en qa"
|
||||
- text: "Write a fairy tale about a troll saving a princess from a dangerous dragon. The fairy tale is a masterpiece that has achieved praise worldwide and its moral is \"Heroes Come in All Shapes and Sizes\". Story (in Spanish):"
|
||||
example_title: "es-en fable"
|
||||
- text: "Write a fable about wood elves living in a forest that is suddenly invaded by ogres. The fable is a masterpiece that has achieved praise worldwide and its moral is \"Violence is the last refuge of the incompetent\". Fable (in Hindi):"
|
||||
example_title: "hi-en fable"
|
||||
model-index:
|
||||
- name: bloomz-7b1-p3
|
||||
results:
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: winogrande
|
||||
name: Winogrande XL (xl)
|
||||
config: xl
|
||||
split: validation
|
||||
revision: a80f460359d1e9a67c006011c94de42a8759430c
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 54.06
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (en)
|
||||
config: en
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 53.72
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (fr)
|
||||
config: fr
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 55.42
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (jp)
|
||||
config: jp
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 51.93
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (pt)
|
||||
config: pt
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 53.99
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (ru)
|
||||
config: ru
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 53.97
|
||||
- task:
|
||||
type: Coreference resolution
|
||||
dataset:
|
||||
type: Muennighoff/xwinograd
|
||||
name: XWinograd (zh)
|
||||
config: zh
|
||||
split: test
|
||||
revision: 9dd5ea5505fad86b7bedad667955577815300cee
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 52.98
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: anli
|
||||
name: ANLI (r1)
|
||||
config: r1
|
||||
split: validation
|
||||
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 35.1
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: anli
|
||||
name: ANLI (r2)
|
||||
config: r2
|
||||
split: validation
|
||||
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 35.4
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: anli
|
||||
name: ANLI (r3)
|
||||
config: r3
|
||||
split: validation
|
||||
revision: 9dbd830a06fea8b1c49d6e5ef2004a08d9f45094
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 37.58
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: super_glue
|
||||
name: SuperGLUE (cb)
|
||||
config: cb
|
||||
split: validation
|
||||
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 62.5
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: super_glue
|
||||
name: SuperGLUE (rte)
|
||||
config: rte
|
||||
split: validation
|
||||
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 78.7
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (ar)
|
||||
config: ar
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 50.64
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (bg)
|
||||
config: bg
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 43.98
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (de)
|
||||
config: de
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 47.03
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (el)
|
||||
config: el
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 41.89
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (en)
|
||||
config: en
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 55.9
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (es)
|
||||
config: es
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 53.73
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (fr)
|
||||
config: fr
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 53.37
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (hi)
|
||||
config: hi
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 49.84
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (ru)
|
||||
config: ru
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 46.55
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (sw)
|
||||
config: sw
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 43.49
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (th)
|
||||
config: th
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 43.17
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (tr)
|
||||
config: tr
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 40.44
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (ur)
|
||||
config: ur
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 45.18
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (vi)
|
||||
config: vi
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 51.97
|
||||
- task:
|
||||
type: Natural language inference
|
||||
dataset:
|
||||
type: xnli
|
||||
name: XNLI (zh)
|
||||
config: zh
|
||||
split: validation
|
||||
revision: a5a45e4ff92d5d3f34de70aaf4b72c3bdf9f7f16
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 52.29
|
||||
- task:
|
||||
type: Program synthesis
|
||||
dataset:
|
||||
type: openai_humaneval
|
||||
name: HumanEval
|
||||
config: None
|
||||
split: test
|
||||
revision: e8dc562f5de170c54b5481011dd9f4fa04845771
|
||||
metrics:
|
||||
- type: Pass@1
|
||||
value: 1.55
|
||||
- type: Pass@10
|
||||
value: 4.12
|
||||
- type: Pass@100
|
||||
value: 9.60
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: story_cloze
|
||||
name: StoryCloze (2016)
|
||||
config: "2016"
|
||||
split: validation
|
||||
revision: e724c6f8cdf7c7a2fb229d862226e15b023ee4db
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 87.07
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: super_glue
|
||||
name: SuperGLUE (copa)
|
||||
config: copa
|
||||
split: validation
|
||||
revision: 9e12063561e7e6c79099feb6d5a493142584e9e2
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 81.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (et)
|
||||
config: et
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 57.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (ht)
|
||||
config: ht
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 56.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (id)
|
||||
config: id
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 70.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (it)
|
||||
config: it
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 60.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (qu)
|
||||
config: qu
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 54.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (sw)
|
||||
config: sw
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 62.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (ta)
|
||||
config: ta
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 71.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (th)
|
||||
config: th
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 63.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (tr)
|
||||
config: tr
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 58.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (vi)
|
||||
config: vi
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 67.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: xcopa
|
||||
name: XCOPA (zh)
|
||||
config: zh
|
||||
split: validation
|
||||
revision: 37f73c60fb123111fa5af5f9b705d0b3747fd187
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 79.0
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (ar)
|
||||
config: ar
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 78.69
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (es)
|
||||
config: es
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 82.93
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (eu)
|
||||
config: eu
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 70.42
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (hi)
|
||||
config: hi
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 72.2
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (id)
|
||||
config: id
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 77.1
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (my)
|
||||
config: my
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 51.49
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (ru)
|
||||
config: ru
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 66.45
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (sw)
|
||||
config: sw
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 60.82
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (te)
|
||||
config: te
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 63.14
|
||||
- task:
|
||||
type: Sentence completion
|
||||
dataset:
|
||||
type: Muennighoff/xstory_cloze
|
||||
name: XStoryCloze (zh)
|
||||
config: zh
|
||||
split: validation
|
||||
revision: 8bb76e594b68147f1a430e86829d07189622b90d
|
||||
metrics:
|
||||
- type: Accuracy
|
||||
value: 80.34
|
||||
---
|
||||
|
||||

|
||||
|
||||
# Table of Contents
|
||||
|
||||
1. [Model Summary](#model-summary)
|
||||
2. [Use](#use)
|
||||
3. [Limitations](#limitations)
|
||||
4. [Training](#training)
|
||||
5. [Evaluation](#evaluation)
|
||||
7. [Citation](#citation)
|
||||
|
||||
# Model Summary
|
||||
|
||||
> We present BLOOMZ & mT0, a family of models capable of following human instructions in dozens of languages zero-shot. We finetune BLOOM & mT5 pretrained multilingual language models on our crosslingual task mixture (xP3) and find the resulting models capable of crosslingual generalization to unseen tasks & languages.
|
||||
|
||||
- **Repository:** [bigscience-workshop/xmtf](https://github.com/bigscience-workshop/xmtf)
|
||||
- **Paper:** [Crosslingual Generalization through Multitask Finetuning](https://arxiv.org/abs/2211.01786)
|
||||
- **Point of Contact:** [Niklas Muennighoff](mailto:niklas@hf.co)
|
||||
- **Languages:** Refer to [bloom](https://huggingface.co/bigscience/bloom) for pretraining & [xP3](https://huggingface.co/datasets/bigscience/xP3) for finetuning language proportions. It understands both pretraining & finetuning languages.
|
||||
- **BLOOMZ & mT0 Model Family:**
|
||||
|
||||
<div class="max-w-full overflow-auto">
|
||||
<table>
|
||||
<tr>
|
||||
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/bigscience/xP3>xP3</a>. Recommended for prompting in English.
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Parameters</td>
|
||||
<td>300M</td>
|
||||
<td>580M</td>
|
||||
<td>1.2B</td>
|
||||
<td>3.7B</td>
|
||||
<td>13B</td>
|
||||
<td>560M</td>
|
||||
<td>1.1B</td>
|
||||
<td>1.7B</td>
|
||||
<td>3B</td>
|
||||
<td>7.1B</td>
|
||||
<td>176B</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Finetuned Model</td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-small>mt0-small</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-base>mt0-base</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-large>mt0-large</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-xl>mt0-xl</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-xxl>mt0-xxl</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-560m>bloomz-560m</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-1b1>bloomz-1b1</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-1b7>bloomz-1b7</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-3b>bloomz-3b</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-7b1>bloomz-7b1</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz>bloomz</a></td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/bigscience/xP3mt>xP3mt</a>. Recommended for prompting in non-English.</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Finetuned Model</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-xxl-mt>mt0-xxl-mt</a></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-7b1-mt>bloomz-7b1-mt</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-mt>bloomz-mt</a></td>
|
||||
</tr>
|
||||
<th colspan="12">Multitask finetuned on <a style="font-weight:bold" href=https://huggingface.co/datasets/Muennighoff/P3>P3</a>. Released for research purposes only. Strictly inferior to above models!</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Finetuned Model</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><a href=https://huggingface.co/bigscience/mt0-xxl-p3>mt0-xxl-p3</a></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-7b1-p3>bloomz-7b1-p3</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloomz-p3>bloomz-p3</a></td>
|
||||
</tr>
|
||||
<th colspan="12">Original pretrained checkpoints. Not recommended.</th>
|
||||
<tr>
|
||||
<td>Pretrained Model</td>
|
||||
<td><a href=https://huggingface.co/google/mt5-small>mt5-small</a></td>
|
||||
<td><a href=https://huggingface.co/google/mt5-base>mt5-base</a></td>
|
||||
<td><a href=https://huggingface.co/google/mt5-large>mt5-large</a></td>
|
||||
<td><a href=https://huggingface.co/google/mt5-xl>mt5-xl</a></td>
|
||||
<td><a href=https://huggingface.co/google/mt5-xxl>mt5-xxl</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom-560m>bloom-560m</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom-1b1>bloom-1b1</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom-1b7>bloom-1b7</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom-3b>bloom-3b</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom-7b1>bloom-7b1</a></td>
|
||||
<td><a href=https://huggingface.co/bigscience/bloom>bloom</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
||||
# Use
|
||||
|
||||
## Intended use
|
||||
|
||||
We recommend using the model to perform tasks expressed in natural language. For example, given the prompt "*Translate to English: Je t’aime.*", the model will most likely answer "*I love you.*". Some prompt ideas from our paper:
|
||||
- 一个传奇的开端,一个不灭的神话,这不仅仅是一部电影,而是作为一个走进新时代的标签,永远彪炳史册。你认为这句话的立场是赞扬、中立还是批评?
|
||||
- Suggest at least five related search terms to "Mạng neural nhân tạo".
|
||||
- Write a fairy tale about a troll saving a princess from a dangerous dragon. The fairy tale is a masterpiece that has achieved praise worldwide and its moral is "Heroes Come in All Shapes and Sizes". Story (in Spanish):
|
||||
- Explain in a sentence in Telugu what is backpropagation in neural networks.
|
||||
|
||||
**Feel free to share your generations in the Community tab!**
|
||||
|
||||
## How to use
|
||||
|
||||
### CPU
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install -q transformers
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
checkpoint = "bigscience/bloomz-7b1-p3"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
||||
model = AutoModelForCausalLM.from_pretrained(checkpoint)
|
||||
|
||||
inputs = tokenizer.encode("Translate to English: Je t’aime.", return_tensors="pt")
|
||||
outputs = model.generate(inputs)
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### GPU
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install -q transformers accelerate
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
checkpoint = "bigscience/bloomz-7b1-p3"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
||||
model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype="auto", device_map="auto")
|
||||
|
||||
inputs = tokenizer.encode("Translate to English: Je t’aime.", return_tensors="pt").to("cuda")
|
||||
outputs = model.generate(inputs)
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### GPU in 8bit
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install -q transformers accelerate bitsandbytes
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
checkpoint = "bigscience/bloomz-7b1-p3"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
||||
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", load_in_8bit=True)
|
||||
|
||||
inputs = tokenizer.encode("Translate to English: Je t’aime.", return_tensors="pt").to("cuda")
|
||||
outputs = model.generate(inputs)
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
<!-- Necessary for whitespace -->
|
||||
###
|
||||
|
||||
# Limitations
|
||||
|
||||
**Prompt Engineering:** The performance may vary depending on the prompt. For BLOOMZ models, we recommend making it very clear when the input stops to avoid the model trying to continue it. For example, the prompt "*Translate to English: Je t'aime*" without the full stop (.) at the end, may result in the model trying to continue the French sentence. Better prompts are e.g. "*Translate to English: Je t'aime.*", "*Translate to English: Je t'aime. Translation:*" "*What is "Je t'aime." in English?*", where it is clear for the model when it should answer. Further, we recommend providing the model as much context as possible. For example, if you want it to answer in Telugu, then tell the model, e.g. "*Explain in a sentence in Telugu what is backpropagation in neural networks.*".
|
||||
|
||||
# Training
|
||||
|
||||
## Model
|
||||
|
||||
- **Architecture:** Same as [bloom-7b1](https://huggingface.co/bigscience/bloom-7b1), also refer to the `config.json` file
|
||||
- **Finetuning steps:** 1000
|
||||
- **Finetuning tokens:** 4.19 billion
|
||||
- **Finetuning layout:** 1x pipeline parallel, 1x tensor parallel, 64x data parallel
|
||||
- **Precision:** float16
|
||||
|
||||
## Hardware
|
||||
|
||||
- **CPUs:** AMD CPUs with 512GB memory per node
|
||||
- **GPUs:** 64 A100 80GB GPUs with 8 GPUs per node (8 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links
|
||||
- **Communication:** NCCL-communications network with a fully dedicated subnet
|
||||
|
||||
## Software
|
||||
|
||||
- **Orchestration:** [Megatron-DeepSpeed](https://github.com/bigscience-workshop/Megatron-DeepSpeed)
|
||||
- **Optimizer & parallelism:** [DeepSpeed](https://github.com/microsoft/DeepSpeed)
|
||||
- **Neural networks:** [PyTorch](https://github.com/pytorch/pytorch) (pytorch-1.11 w/ CUDA-11.5)
|
||||
- **FP16 if applicable:** [apex](https://github.com/NVIDIA/apex)
|
||||
|
||||
# Evaluation
|
||||
|
||||
We refer to Table 7 from our [paper](https://arxiv.org/abs/2211.01786) & [bigscience/evaluation-results](https://huggingface.co/datasets/bigscience/evaluation-results) for zero-shot results on unseen tasks. The sidebar reports zero-shot performance of the best prompt per dataset config.
|
||||
|
||||
# Citation
|
||||
```bibtex
|
||||
@misc{muennighoff2022crosslingual,
|
||||
title={Crosslingual Generalization through Multitask Finetuning},
|
||||
author={Niklas Muennighoff and Thomas Wang and Lintang Sutawika and Adam Roberts and Stella Biderman and Teven Le Scao and M Saiful Bari and Sheng Shen and Zheng-Xin Yong and Hailey Schoelkopf and Xiangru Tang and Dragomir Radev and Alham Fikri Aji and Khalid Almubarak and Samuel Albanie and Zaid Alyafeai and Albert Webson and Edward Raff and Colin Raffel},
|
||||
year={2022},
|
||||
eprint={2211.01786},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL}
|
||||
}
|
||||
```
|
||||
31
config.json
Normal file
31
config.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"apply_residual_connection_post_layernorm": false,
|
||||
"architectures": [
|
||||
"BloomForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"attention_softmax_in_fp32": true,
|
||||
"bias_dropout_fusion": true,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"hidden_dropout": 0.0,
|
||||
"initializer_range": 0.02,
|
||||
"layer_norm_epsilon": 1e-05,
|
||||
"masked_softmax_fusion": true,
|
||||
"model_type": "bloom",
|
||||
"n_embed": 4096,
|
||||
"n_inner": null,
|
||||
"n_layer": 30,
|
||||
"num_attention_heads": 32,
|
||||
"offset_alibi": 100,
|
||||
"pad_token_id": 3,
|
||||
"pretraining_tp": 4,
|
||||
"seq_length": 2048,
|
||||
"skip_bias_add": true,
|
||||
"skip_bias_add_qkv": false,
|
||||
"slow_but_exact": false,
|
||||
"transformers_version": "4.21.0.dev0",
|
||||
"unk_token_id": 0,
|
||||
"use_cache": true,
|
||||
"vocab_size": 250880
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "ar",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7518199867637326
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "ar",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7749834546657842
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "ar",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.586366644606221
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "ar",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7518199867637326
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "ar",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7438782263401721
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7835870284579749
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.8292521508934481
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6399735274652548
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7935142289874255
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "es",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7888815354070152
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "eu",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7041694242223693
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "eu",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6823295830575777
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "eu",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5625413633355394
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "eu",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6671078755790867
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "eu",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.671740569159497
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='eu', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "hi",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6915949702183984
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "hi",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7220383851753805
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "hi",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5883520847121112
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "hi",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6743878226340172
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "hi",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6816677696889477
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7445400397088021
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.771012574454004
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6029119788219722
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7485109199205824
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7438782263401721
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7610853739245532
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7961614824619457
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6214427531436135
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7696889477167439
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xstory_cloze",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7670416942422237
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "Replace",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5225806451612903
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "True or False",
|
||||
"evaluation": {
|
||||
"accuracy": 0.48946236559139783
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "does underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5281720430107527
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "stand for",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5062365591397849
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "en",
|
||||
"template_name": "underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5372043010752688
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "Replace",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5060240963855421
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "True or False",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5421686746987951
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "does underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5542168674698795
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "stand for",
|
||||
"evaluation": {
|
||||
"accuracy": 0.4819277108433735
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "fr",
|
||||
"template_name": "underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5301204819277109
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "pt",
|
||||
"template_name": "Replace",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5133079847908745
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "pt",
|
||||
"template_name": "True or False",
|
||||
"evaluation": {
|
||||
"accuracy": 0.4714828897338403
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "pt",
|
||||
"template_name": "does underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5209125475285171
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "pt",
|
||||
"template_name": "stand for",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5019011406844106
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "pt",
|
||||
"template_name": "underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5399239543726235
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "Replace",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5257936507936508
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "True or False",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5297619047619048
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "does underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5218253968253969
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "stand for",
|
||||
"evaluation": {
|
||||
"accuracy": 0.4444444444444444
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "Muennighoff/xwinograd",
|
||||
"dataset_config_name": "zh",
|
||||
"template_name": "underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5198412698412699
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r1",
|
||||
"template_name": "GPT-3 style",
|
||||
"evaluation": {
|
||||
"accuracy": 0.351
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r1",
|
||||
"template_name": "MNLI crowdsource",
|
||||
"evaluation": {
|
||||
"accuracy": 0.334
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r1",
|
||||
"template_name": "can we infer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.351
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r1",
|
||||
"template_name": "guaranteed/possible/impossible",
|
||||
"evaluation": {
|
||||
"accuracy": 0.288
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r1",
|
||||
"template_name": "justified in saying",
|
||||
"evaluation": {
|
||||
"accuracy": 0.345
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r1', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r1', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r2",
|
||||
"template_name": "GPT-3 style",
|
||||
"evaluation": {
|
||||
"accuracy": 0.339
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r2",
|
||||
"template_name": "MNLI crowdsource",
|
||||
"evaluation": {
|
||||
"accuracy": 0.335
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r2",
|
||||
"template_name": "can we infer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.354
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r2",
|
||||
"template_name": "guaranteed/possible/impossible",
|
||||
"evaluation": {
|
||||
"accuracy": 0.297
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r2",
|
||||
"template_name": "justified in saying",
|
||||
"evaluation": {
|
||||
"accuracy": 0.345
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r2', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r2', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r3",
|
||||
"template_name": "GPT-3 style",
|
||||
"evaluation": {
|
||||
"accuracy": 0.37583333333333335
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r3",
|
||||
"template_name": "MNLI crowdsource",
|
||||
"evaluation": {
|
||||
"accuracy": 0.3408333333333333
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r3",
|
||||
"template_name": "can we infer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.36333333333333334
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r3",
|
||||
"template_name": "guaranteed/possible/impossible",
|
||||
"evaluation": {
|
||||
"accuracy": 0.31083333333333335
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "anli",
|
||||
"dataset_config_name": "dev_r3",
|
||||
"template_name": "justified in saying",
|
||||
"evaluation": {
|
||||
"accuracy": 0.34
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='dev_r3', dataset_name='anli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='dev_r3', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
194
evaluation_bloomz-7b1-p3/evaluation_l1/merged.csv
Normal file
194
evaluation_bloomz-7b1-p3/evaluation_l1/merged.csv
Normal file
@@ -0,0 +1,194 @@
|
||||
dataset,prompt,metric,value
|
||||
anli_dev_r1,GPT-3 style,accuracy,0.351
|
||||
anli_dev_r1,MNLI crowdsource,accuracy,0.334
|
||||
anli_dev_r1,can we infer,accuracy,0.351
|
||||
anli_dev_r1,guaranteed/possible/impossible,accuracy,0.288
|
||||
anli_dev_r1,justified in saying,accuracy,0.345
|
||||
anli_dev_r1,median,accuracy,0.345
|
||||
anli_dev_r2,GPT-3 style,accuracy,0.339
|
||||
anli_dev_r2,MNLI crowdsource,accuracy,0.335
|
||||
anli_dev_r2,can we infer,accuracy,0.354
|
||||
anli_dev_r2,guaranteed/possible/impossible,accuracy,0.297
|
||||
anli_dev_r2,justified in saying,accuracy,0.345
|
||||
anli_dev_r2,median,accuracy,0.339
|
||||
anli_dev_r3,GPT-3 style,accuracy,0.37583333333333335
|
||||
anli_dev_r3,MNLI crowdsource,accuracy,0.3408333333333333
|
||||
anli_dev_r3,can we infer,accuracy,0.36333333333333334
|
||||
anli_dev_r3,guaranteed/possible/impossible,accuracy,0.31083333333333335
|
||||
anli_dev_r3,justified in saying,accuracy,0.34
|
||||
anli_dev_r3,median,accuracy,0.3408333333333333
|
||||
story_cloze_2016,Answer Given options,accuracy,0.8305718866916088
|
||||
story_cloze_2016,Choose Story Ending,accuracy,0.8706574024585783
|
||||
story_cloze_2016,Generate Ending,accuracy,0.7183324425440941
|
||||
story_cloze_2016,Novel Correct Ending,accuracy,0.848743987172635
|
||||
story_cloze_2016,Story Continuation and Options,accuracy,0.8466060929983966
|
||||
story_cloze_2016,median,accuracy,0.8466060929983966
|
||||
super_glue_cb,GPT-3 style,accuracy,0.625
|
||||
super_glue_cb,MNLI crowdsource,accuracy,0.08928571428571429
|
||||
super_glue_cb,can we infer,accuracy,0.5892857142857143
|
||||
super_glue_cb,guaranteed/possible/impossible,accuracy,0.5
|
||||
super_glue_cb,justified in saying,accuracy,0.5357142857142857
|
||||
super_glue_cb,median,accuracy,0.5357142857142857
|
||||
super_glue_copa,"C1 or C2? premise, so/because…",accuracy,0.66
|
||||
super_glue_copa,best_option,accuracy,0.67
|
||||
super_glue_copa,cause_effect,accuracy,0.78
|
||||
super_glue_copa,i_am_hesitating,accuracy,0.8
|
||||
super_glue_copa,plausible_alternatives,accuracy,0.81
|
||||
super_glue_copa,median,accuracy,0.78
|
||||
super_glue_rte,GPT-3 style,accuracy,0.7870036101083032
|
||||
super_glue_rte,MNLI crowdsource,accuracy,0.7220216606498195
|
||||
super_glue_rte,does it follow that,accuracy,0.6678700361010831
|
||||
super_glue_rte,guaranteed true,accuracy,0.6714801444043321
|
||||
super_glue_rte,should assume,accuracy,0.6678700361010831
|
||||
super_glue_rte,median,accuracy,0.6714801444043321
|
||||
winogrande_winogrande_xl,Replace,accuracy,0.5406471981057617
|
||||
winogrande_winogrande_xl,True or False,accuracy,0.5074980268350434
|
||||
winogrande_winogrande_xl,does underscore refer to,accuracy,0.5177584846093133
|
||||
winogrande_winogrande_xl,stand for,accuracy,0.510655090765588
|
||||
winogrande_winogrande_xl,underscore refer to,accuracy,0.5256511444356748
|
||||
winogrande_winogrande_xl,median,accuracy,0.5177584846093133
|
||||
xcopa_id,"C1 or C2? premise, so/because…",accuracy,0.47
|
||||
xcopa_id,best_option,accuracy,0.51
|
||||
xcopa_id,cause_effect,accuracy,0.65
|
||||
xcopa_id,i_am_hesitating,accuracy,0.66
|
||||
xcopa_id,plausible_alternatives,accuracy,0.67
|
||||
xcopa_id,median,accuracy,0.65
|
||||
xcopa_sw,"C1 or C2? premise, so/because…",accuracy,0.58
|
||||
xcopa_sw,best_option,accuracy,0.57
|
||||
xcopa_sw,cause_effect,accuracy,0.46
|
||||
xcopa_sw,i_am_hesitating,accuracy,0.48
|
||||
xcopa_sw,plausible_alternatives,accuracy,0.45
|
||||
xcopa_sw,median,accuracy,0.48
|
||||
xcopa_ta,"C1 or C2? premise, so/because…",accuracy,0.57
|
||||
xcopa_ta,best_option,accuracy,0.67
|
||||
xcopa_ta,cause_effect,accuracy,0.71
|
||||
xcopa_ta,i_am_hesitating,accuracy,0.71
|
||||
xcopa_ta,plausible_alternatives,accuracy,0.69
|
||||
xcopa_ta,median,accuracy,0.69
|
||||
xcopa_vi,"C1 or C2? premise, so/because…",accuracy,0.55
|
||||
xcopa_vi,best_option,accuracy,0.61
|
||||
xcopa_vi,cause_effect,accuracy,0.67
|
||||
xcopa_vi,i_am_hesitating,accuracy,0.66
|
||||
xcopa_vi,plausible_alternatives,accuracy,0.65
|
||||
xcopa_vi,median,accuracy,0.65
|
||||
xcopa_zh,"C1 or C2? premise, so/because…",accuracy,0.62
|
||||
xcopa_zh,best_option,accuracy,0.61
|
||||
xcopa_zh,cause_effect,accuracy,0.77
|
||||
xcopa_zh,i_am_hesitating,accuracy,0.72
|
||||
xcopa_zh,plausible_alternatives,accuracy,0.74
|
||||
xcopa_zh,median,accuracy,0.72
|
||||
xnli_ar,GPT-3 style,accuracy,0.5040160642570282
|
||||
xnli_ar,MNLI crowdsource,accuracy,0.39879518072289155
|
||||
xnli_ar,can we infer,accuracy,0.506425702811245
|
||||
xnli_ar,guaranteed/possible/impossible,accuracy,0.4799196787148594
|
||||
xnli_ar,justified in saying,accuracy,0.41526104417670684
|
||||
xnli_ar,median,accuracy,0.4799196787148594
|
||||
xnli_en,GPT-3 style,accuracy,0.5590361445783133
|
||||
xnli_en,MNLI crowdsource,accuracy,0.342570281124498
|
||||
xnli_en,can we infer,accuracy,0.5449799196787148
|
||||
xnli_en,guaranteed/possible/impossible,accuracy,0.41164658634538154
|
||||
xnli_en,justified in saying,accuracy,0.4634538152610442
|
||||
xnli_en,median,accuracy,0.4634538152610442
|
||||
xnli_es,GPT-3 style,accuracy,0.5373493975903615
|
||||
xnli_es,MNLI crowdsource,accuracy,0.40441767068273093
|
||||
xnli_es,can we infer,accuracy,0.5277108433734939
|
||||
xnli_es,guaranteed/possible/impossible,accuracy,0.44216867469879517
|
||||
xnli_es,justified in saying,accuracy,0.4534136546184739
|
||||
xnli_es,median,accuracy,0.4534136546184739
|
||||
xnli_fr,GPT-3 style,accuracy,0.5248995983935743
|
||||
xnli_fr,MNLI crowdsource,accuracy,0.3895582329317269
|
||||
xnli_fr,can we infer,accuracy,0.5337349397590362
|
||||
xnli_fr,guaranteed/possible/impossible,accuracy,0.42971887550200805
|
||||
xnli_fr,justified in saying,accuracy,0.4738955823293173
|
||||
xnli_fr,median,accuracy,0.4738955823293173
|
||||
xnli_hi,GPT-3 style,accuracy,0.4983935742971888
|
||||
xnli_hi,MNLI crowdsource,accuracy,0.38714859437751004
|
||||
xnli_hi,can we infer,accuracy,0.45542168674698796
|
||||
xnli_hi,guaranteed/possible/impossible,accuracy,0.41405622489959837
|
||||
xnli_hi,justified in saying,accuracy,0.38795180722891565
|
||||
xnli_hi,median,accuracy,0.41405622489959837
|
||||
xnli_sw,GPT-3 style,accuracy,0.43493975903614457
|
||||
xnli_sw,MNLI crowdsource,accuracy,0.363855421686747
|
||||
xnli_sw,can we infer,accuracy,0.42891566265060244
|
||||
xnli_sw,guaranteed/possible/impossible,accuracy,0.3457831325301205
|
||||
xnli_sw,justified in saying,accuracy,0.3650602409638554
|
||||
xnli_sw,median,accuracy,0.3650602409638554
|
||||
xnli_ur,GPT-3 style,accuracy,0.43493975903614457
|
||||
xnli_ur,MNLI crowdsource,accuracy,0.3895582329317269
|
||||
xnli_ur,can we infer,accuracy,0.45180722891566266
|
||||
xnli_ur,guaranteed/possible/impossible,accuracy,0.40120481927710844
|
||||
xnli_ur,justified in saying,accuracy,0.37630522088353413
|
||||
xnli_ur,median,accuracy,0.40120481927710844
|
||||
xnli_vi,GPT-3 style,accuracy,0.5196787148594377
|
||||
xnli_vi,MNLI crowdsource,accuracy,0.38112449799196785
|
||||
xnli_vi,can we infer,accuracy,0.5080321285140562
|
||||
xnli_vi,guaranteed/possible/impossible,accuracy,0.38393574297188754
|
||||
xnli_vi,justified in saying,accuracy,0.43614457831325304
|
||||
xnli_vi,median,accuracy,0.43614457831325304
|
||||
xnli_zh,GPT-3 style,accuracy,0.5052208835341365
|
||||
xnli_zh,MNLI crowdsource,accuracy,0.4
|
||||
xnli_zh,can we infer,accuracy,0.5228915662650603
|
||||
xnli_zh,guaranteed/possible/impossible,accuracy,0.4738955823293173
|
||||
xnli_zh,justified in saying,accuracy,0.45863453815261046
|
||||
xnli_zh,median,accuracy,0.4738955823293173
|
||||
xstory_cloze_ar,Answer Given options,accuracy,0.7518199867637326
|
||||
xstory_cloze_ar,Choose Story Ending,accuracy,0.7749834546657842
|
||||
xstory_cloze_ar,Generate Ending,accuracy,0.586366644606221
|
||||
xstory_cloze_ar,Novel Correct Ending,accuracy,0.7518199867637326
|
||||
xstory_cloze_ar,Story Continuation and Options,accuracy,0.7438782263401721
|
||||
xstory_cloze_ar,median,accuracy,0.7518199867637326
|
||||
xstory_cloze_es,Answer Given options,accuracy,0.7835870284579749
|
||||
xstory_cloze_es,Choose Story Ending,accuracy,0.8292521508934481
|
||||
xstory_cloze_es,Generate Ending,accuracy,0.6399735274652548
|
||||
xstory_cloze_es,Novel Correct Ending,accuracy,0.7935142289874255
|
||||
xstory_cloze_es,Story Continuation and Options,accuracy,0.7888815354070152
|
||||
xstory_cloze_es,median,accuracy,0.7888815354070152
|
||||
xstory_cloze_eu,Answer Given options,accuracy,0.7041694242223693
|
||||
xstory_cloze_eu,Choose Story Ending,accuracy,0.6823295830575777
|
||||
xstory_cloze_eu,Generate Ending,accuracy,0.5625413633355394
|
||||
xstory_cloze_eu,Novel Correct Ending,accuracy,0.6671078755790867
|
||||
xstory_cloze_eu,Story Continuation and Options,accuracy,0.671740569159497
|
||||
xstory_cloze_eu,median,accuracy,0.671740569159497
|
||||
xstory_cloze_hi,Answer Given options,accuracy,0.6915949702183984
|
||||
xstory_cloze_hi,Choose Story Ending,accuracy,0.7220383851753805
|
||||
xstory_cloze_hi,Generate Ending,accuracy,0.5883520847121112
|
||||
xstory_cloze_hi,Novel Correct Ending,accuracy,0.6743878226340172
|
||||
xstory_cloze_hi,Story Continuation and Options,accuracy,0.6816677696889477
|
||||
xstory_cloze_hi,median,accuracy,0.6816677696889477
|
||||
xstory_cloze_id,Answer Given options,accuracy,0.7445400397088021
|
||||
xstory_cloze_id,Choose Story Ending,accuracy,0.771012574454004
|
||||
xstory_cloze_id,Generate Ending,accuracy,0.6029119788219722
|
||||
xstory_cloze_id,Novel Correct Ending,accuracy,0.7485109199205824
|
||||
xstory_cloze_id,Story Continuation and Options,accuracy,0.7438782263401721
|
||||
xstory_cloze_id,median,accuracy,0.7445400397088021
|
||||
xstory_cloze_zh,Answer Given options,accuracy,0.7610853739245532
|
||||
xstory_cloze_zh,Choose Story Ending,accuracy,0.7961614824619457
|
||||
xstory_cloze_zh,Generate Ending,accuracy,0.6214427531436135
|
||||
xstory_cloze_zh,Novel Correct Ending,accuracy,0.7696889477167439
|
||||
xstory_cloze_zh,Story Continuation and Options,accuracy,0.7670416942422237
|
||||
xstory_cloze_zh,median,accuracy,0.7670416942422237
|
||||
xwinograd_en,Replace,accuracy,0.5225806451612903
|
||||
xwinograd_en,True or False,accuracy,0.48946236559139783
|
||||
xwinograd_en,does underscore refer to,accuracy,0.5281720430107527
|
||||
xwinograd_en,stand for,accuracy,0.5062365591397849
|
||||
xwinograd_en,underscore refer to,accuracy,0.5372043010752688
|
||||
xwinograd_en,median,accuracy,0.5225806451612903
|
||||
xwinograd_fr,Replace,accuracy,0.5060240963855421
|
||||
xwinograd_fr,True or False,accuracy,0.5421686746987951
|
||||
xwinograd_fr,does underscore refer to,accuracy,0.5542168674698795
|
||||
xwinograd_fr,stand for,accuracy,0.4819277108433735
|
||||
xwinograd_fr,underscore refer to,accuracy,0.5301204819277109
|
||||
xwinograd_fr,median,accuracy,0.5301204819277109
|
||||
xwinograd_pt,Replace,accuracy,0.5133079847908745
|
||||
xwinograd_pt,True or False,accuracy,0.4714828897338403
|
||||
xwinograd_pt,does underscore refer to,accuracy,0.5209125475285171
|
||||
xwinograd_pt,stand for,accuracy,0.5019011406844106
|
||||
xwinograd_pt,underscore refer to,accuracy,0.5399239543726235
|
||||
xwinograd_pt,median,accuracy,0.5133079847908745
|
||||
xwinograd_zh,Replace,accuracy,0.5257936507936508
|
||||
xwinograd_zh,True or False,accuracy,0.5297619047619048
|
||||
xwinograd_zh,does underscore refer to,accuracy,0.5218253968253969
|
||||
xwinograd_zh,stand for,accuracy,0.4444444444444444
|
||||
xwinograd_zh,underscore refer to,accuracy,0.5198412698412699
|
||||
xwinograd_zh,median,accuracy,0.5218253968253969
|
||||
multiple,average,multiple,0.5631550819200618
|
||||
|
1
evaluation_bloomz-7b1-p3/evaluation_l1/merged.json
Normal file
1
evaluation_bloomz-7b1-p3/evaluation_l1/merged.json
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "story_cloze",
|
||||
"dataset_config_name": "2016",
|
||||
"template_name": "Answer Given options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.8305718866916088
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "story_cloze",
|
||||
"dataset_config_name": "2016",
|
||||
"template_name": "Choose Story Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.8706574024585783
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "story_cloze",
|
||||
"dataset_config_name": "2016",
|
||||
"template_name": "Generate Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7183324425440941
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "story_cloze",
|
||||
"dataset_config_name": "2016",
|
||||
"template_name": "Novel Correct Ending",
|
||||
"evaluation": {
|
||||
"accuracy": 0.848743987172635
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "story_cloze",
|
||||
"dataset_config_name": "2016",
|
||||
"template_name": "Story Continuation and Options",
|
||||
"evaluation": {
|
||||
"accuracy": 0.8466060929983966
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='2016', dataset_name='story_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "cb",
|
||||
"template_name": "GPT-3 style",
|
||||
"evaluation": {
|
||||
"accuracy": 0.625
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "cb",
|
||||
"template_name": "MNLI crowdsource",
|
||||
"evaluation": {
|
||||
"accuracy": 0.08928571428571429
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "cb",
|
||||
"template_name": "can we infer",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5892857142857143
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "cb",
|
||||
"template_name": "guaranteed/possible/impossible",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "cb",
|
||||
"template_name": "justified in saying",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5357142857142857
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='cb', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "C1 or C2? premise, so/because\u2026",
|
||||
"evaluation": {
|
||||
"accuracy": 0.66
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise, so/because\u2026', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "C1 or C2? premise, so/because\u2026",
|
||||
"evaluation": {
|
||||
"accuracy": 0.66
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name=None, template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "best_option",
|
||||
"evaluation": {
|
||||
"accuracy": 0.67
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "cause_effect",
|
||||
"evaluation": {
|
||||
"accuracy": 0.78
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "i_am_hesitating",
|
||||
"evaluation": {
|
||||
"accuracy": 0.8
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='i_am_hesitating', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "copa",
|
||||
"template_name": "plausible_alternatives",
|
||||
"evaluation": {
|
||||
"accuracy": 0.81
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='copa', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "rte",
|
||||
"template_name": "GPT-3 style",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7870036101083032
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "rte",
|
||||
"template_name": "MNLI crowdsource",
|
||||
"evaluation": {
|
||||
"accuracy": 0.7220216606498195
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "rte",
|
||||
"template_name": "does it follow that",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6678700361010831
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does it follow that', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "rte",
|
||||
"template_name": "guaranteed true",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6714801444043321
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='guaranteed true', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "super_glue",
|
||||
"dataset_config_name": "rte",
|
||||
"template_name": "should assume",
|
||||
"evaluation": {
|
||||
"accuracy": 0.6678700361010831
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='rte', dataset_name='super_glue', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='should assume', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "winogrande",
|
||||
"dataset_config_name": "winogrande_xl",
|
||||
"template_name": "Replace",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5406471981057617
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "winogrande",
|
||||
"dataset_config_name": "winogrande_xl",
|
||||
"template_name": "True or False",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5074980268350434
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "winogrande",
|
||||
"dataset_config_name": "winogrande_xl",
|
||||
"template_name": "does underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5177584846093133
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='does underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "winogrande",
|
||||
"dataset_config_name": "winogrande_xl",
|
||||
"template_name": "stand for",
|
||||
"evaluation": {
|
||||
"accuracy": 0.510655090765588
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "winogrande",
|
||||
"dataset_config_name": "winogrande_xl",
|
||||
"template_name": "underscore refer to",
|
||||
"evaluation": {
|
||||
"accuracy": 0.5256511444356748
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='winogrande_xl', dataset_name='winogrande', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "xcopa",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "C1 or C2? premise, so/because\u2026",
|
||||
"evaluation": {
|
||||
"accuracy": 0.47
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='so/because\u2026,validation', target_max_length=256, template_config_name='en', template_name='C1 or C2? premise', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "xcopa",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "best_option",
|
||||
"evaluation": {
|
||||
"accuracy": 0.51
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='best_option', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"dataset_name": "xcopa",
|
||||
"dataset_config_name": "id",
|
||||
"template_name": "cause_effect",
|
||||
"evaluation": {
|
||||
"accuracy": 0.65
|
||||
},
|
||||
"arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000', output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/6b3t0/p31lossseqglobal_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user