初始化项目,由ModelHub XC社区提供模型
Model: seopbo/rlvrmathif-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 341,
|
||||
"gen_seconds": 69,
|
||||
"symbolic_correct": 77.8620166793025,
|
||||
"no_answer": 1.061410159211524
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 642,
|
||||
"gen_seconds": 165,
|
||||
"symbolic_correct": 55.06,
|
||||
"no_answer": 5.42
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 109,
|
||||
"gen_seconds": 53,
|
||||
"passing_base_tests": 48.78048780487805,
|
||||
"passing_plus_tests": 42.073170731707314
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 19.621027515483807,
|
||||
"prompt_strict_accuracy": 17.006802721088434,
|
||||
"instruction_strict_accuracy": 19.402985074626866,
|
||||
"prompt_loose_accuracy": 19.387755102040817,
|
||||
"instruction_loose_accuracy": 22.686567164179106,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 445,
|
||||
"gen_seconds": 53
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 69.29170157404575,
|
||||
"prompt_strict_accuracy": 63.21626617375231,
|
||||
"instruction_strict_accuracy": 72.78177458033574,
|
||||
"prompt_loose_accuracy": 65.98890942698706,
|
||||
"instruction_loose_accuracy": 75.17985611510791,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 494,
|
||||
"gen_seconds": 60
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 66,
|
||||
"gen_seconds": 53,
|
||||
"passing_base_tests": 61.111111111111114,
|
||||
"passing_plus_tests": 52.645502645502646
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 633,
|
||||
"gen_seconds": 64,
|
||||
"symbolic_correct": 17.279411764705884,
|
||||
"no_answer": 4.779411764705882
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user