初始化项目,由ModelHub XC社区提供模型
Model: seopbo/rlvrcodemathif-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 339,
|
||||
"gen_seconds": 75,
|
||||
"symbolic_correct": 77.93783169067476,
|
||||
"no_answer": 0.9097801364670205
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 648,
|
||||
"gen_seconds": 164,
|
||||
"symbolic_correct": 55.36,
|
||||
"no_answer": 5.88
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 87,
|
||||
"gen_seconds": 41,
|
||||
"passing_base_tests": 54.26829268292683,
|
||||
"passing_plus_tests": 46.34146341463415
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 20.429739059803026,
|
||||
"prompt_strict_accuracy": 17.687074829931973,
|
||||
"instruction_strict_accuracy": 19.701492537313435,
|
||||
"prompt_loose_accuracy": 20.74829931972789,
|
||||
"instruction_loose_accuracy": 23.582089552238806,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 423,
|
||||
"gen_seconds": 51
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 71.13392465325336,
|
||||
"prompt_strict_accuracy": 65.06469500924214,
|
||||
"instruction_strict_accuracy": 74.10071942446042,
|
||||
"prompt_loose_accuracy": 68.39186691312385,
|
||||
"instruction_loose_accuracy": 76.97841726618705,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 480,
|
||||
"gen_seconds": 58
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 60,
|
||||
"gen_seconds": 51,
|
||||
"passing_base_tests": 67.46031746031746,
|
||||
"passing_plus_tests": 57.407407407407405
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 676,
|
||||
"gen_seconds": 58,
|
||||
"symbolic_correct": 17.279411764705884,
|
||||
"no_answer": 6.985294117647059
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user