56 lines
1.4 KiB
JSON
56 lines
1.4 KiB
JSON
|
|
{
|
||
|
|
"results": {
|
||
|
|
"arc_easy": {
|
||
|
|
"acc": 0.43813131313131315,
|
||
|
|
"acc_stderr": 0.010180937100600052,
|
||
|
|
"acc_norm": 0.4019360269360269,
|
||
|
|
"acc_norm_stderr": 0.010060521220920566
|
||
|
|
},
|
||
|
|
"boolq": {
|
||
|
|
"acc": 0.617737003058104,
|
||
|
|
"acc_stderr": 0.00849914969044927
|
||
|
|
},
|
||
|
|
"lambada_openai": {
|
||
|
|
"ppl": 64.94966274873535,
|
||
|
|
"ppl_stderr": 2.5466406639926897,
|
||
|
|
"acc": 0.26470017465554047,
|
||
|
|
"acc_stderr": 0.006146408462993569
|
||
|
|
},
|
||
|
|
"openbookqa": {
|
||
|
|
"acc": 0.166,
|
||
|
|
"acc_stderr": 0.016656616876531142,
|
||
|
|
"acc_norm": 0.28,
|
||
|
|
"acc_norm_stderr": 0.020099950647503237
|
||
|
|
},
|
||
|
|
"piqa": {
|
||
|
|
"acc": 0.5973884657236126,
|
||
|
|
"acc_stderr": 0.011442395233488702,
|
||
|
|
"acc_norm": 0.6088139281828074,
|
||
|
|
"acc_norm_stderr": 0.0113862156067287
|
||
|
|
},
|
||
|
|
"winogrande": {
|
||
|
|
"acc": 0.5098658247829518,
|
||
|
|
"acc_stderr": 0.014049749833367589
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"versions": {
|
||
|
|
"arc_easy": 0,
|
||
|
|
"boolq": 1,
|
||
|
|
"lambada_openai": 0,
|
||
|
|
"openbookqa": 0,
|
||
|
|
"piqa": 0,
|
||
|
|
"winogrande": 0
|
||
|
|
},
|
||
|
|
"config": {
|
||
|
|
"model": "hf-causal-experimental",
|
||
|
|
"model_args": "pretrained=BEE-spoke-data/smol_llama-220M-GQA,revision=main,trust_remote_code=True,dtype='bfloat16'",
|
||
|
|
"num_fewshot": 0,
|
||
|
|
"batch_size": "8",
|
||
|
|
"batch_sizes": [],
|
||
|
|
"device": "cuda",
|
||
|
|
"no_cache": false,
|
||
|
|
"limit": null,
|
||
|
|
"bootstrap_iters": 100000,
|
||
|
|
"description_dict": {}
|
||
|
|
}
|
||
|
|
}
|