119 lines
2.3 KiB
JSON
119 lines
2.3 KiB
JSON
{
|
|
"amc-cot": {
|
|
"cot": {
|
|
"accuracy": 0.05,
|
|
"n_samples": 40
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"asdiv-cot": {
|
|
"cot": {
|
|
"accuracy": 0.8446952595936794,
|
|
"n_samples": 2215
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"gsm8k-cot": {
|
|
"cot": {
|
|
"accuracy": 0.7740712661106899,
|
|
"n_samples": 1319
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"math-500-cot": {
|
|
"cot": {
|
|
"accuracy": 0.426,
|
|
"n_samples": 500
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"math-cot": {
|
|
"cot": {
|
|
"accuracy": 0.4482,
|
|
"n_samples": 5000
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"math_sat-cot": {
|
|
"cot": {
|
|
"accuracy": 0.875,
|
|
"n_samples": 32
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"mathqa-cot": {
|
|
"cot": {
|
|
"accuracy": 0.605,
|
|
"n_samples": 1000
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"mawps-cot": {
|
|
"cot": {
|
|
"accuracy": 0.9598062953995158,
|
|
"n_samples": 2065
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"mmlu-stem-cot": {
|
|
"cot": {
|
|
"accuracy": 0.6408217362491716,
|
|
"n_samples": 3018
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"ocw-courses-cot": {
|
|
"cot": {
|
|
"accuracy": 0.15073529411764705,
|
|
"n_samples": 272
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"olympiad-bench-cot": {
|
|
"cot": {
|
|
"accuracy": 0.047407407407407405,
|
|
"n_samples": 675
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"svamp-cot": {
|
|
"cot": {
|
|
"accuracy": 0.861,
|
|
"n_samples": 1000
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
},
|
|
"tabmwp-cot": {
|
|
"cot": {
|
|
"accuracy": 0.689,
|
|
"n_samples": 1000
|
|
},
|
|
"tool": {
|
|
"n_samples": 0
|
|
}
|
|
}
|
|
} |