初始化项目,由ModelHub XC社区提供模型
Model: seopbo/zerorlvrmath-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 408,
|
||||
"gen_seconds": 60,
|
||||
"symbolic_correct": 70.35633055344958,
|
||||
"no_answer": 0.9855951478392722
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 672,
|
||||
"gen_seconds": 142,
|
||||
"symbolic_correct": 53.22,
|
||||
"no_answer": 7.62
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 430,
|
||||
"gen_seconds": 34,
|
||||
"passing_base_tests": 31.70731707317073,
|
||||
"passing_plus_tests": 25.609756097560975
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 19.546400649812163,
|
||||
"prompt_strict_accuracy": 16.666666666666664,
|
||||
"instruction_strict_accuracy": 19.402985074626866,
|
||||
"prompt_loose_accuracy": 19.727891156462583,
|
||||
"instruction_loose_accuracy": 22.388059701492537,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 1881,
|
||||
"gen_seconds": 37
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 31.078261235743383,
|
||||
"prompt_strict_accuracy": 23.65988909426987,
|
||||
"instruction_strict_accuracy": 35.4916067146283,
|
||||
"prompt_loose_accuracy": 26.432532347504623,
|
||||
"instruction_loose_accuracy": 38.72901678657074,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 1762,
|
||||
"gen_seconds": 55
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 321,
|
||||
"gen_seconds": 34,
|
||||
"passing_base_tests": 15.343915343915343,
|
||||
"passing_plus_tests": 13.756613756613756
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 1270,
|
||||
"gen_seconds": 37,
|
||||
"symbolic_correct": 12.132352941176471,
|
||||
"no_answer": 12.867647058823529
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user