初始化项目,由ModelHub XC社区提供模型
Model: seopbo/rlvrmath-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 351,
|
||||
"gen_seconds": 32,
|
||||
"symbolic_correct": 77.48294162244125,
|
||||
"no_answer": 1.7437452615617892
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 663,
|
||||
"gen_seconds": 117,
|
||||
"symbolic_correct": 54.56,
|
||||
"no_answer": 6.94
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 105,
|
||||
"gen_seconds": 26,
|
||||
"passing_base_tests": 48.170731707317074,
|
||||
"passing_plus_tests": 42.073170731707314
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 13.37166209767489,
|
||||
"prompt_strict_accuracy": 10.54421768707483,
|
||||
"instruction_strict_accuracy": 11.641791044776118,
|
||||
"prompt_loose_accuracy": 14.285714285714285,
|
||||
"instruction_loose_accuracy": 17.01492537313433,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 429,
|
||||
"gen_seconds": 27
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 54.65005075422103,
|
||||
"prompt_strict_accuracy": 47.874306839186694,
|
||||
"instruction_strict_accuracy": 58.39328537170264,
|
||||
"prompt_loose_accuracy": 50.46210720887245,
|
||||
"instruction_loose_accuracy": 61.87050359712231,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 472,
|
||||
"gen_seconds": 30
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 67,
|
||||
"gen_seconds": 27,
|
||||
"passing_base_tests": 63.22751322751323,
|
||||
"passing_plus_tests": 54.4973544973545
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 641,
|
||||
"gen_seconds": 27,
|
||||
"symbolic_correct": 19.11764705882353,
|
||||
"no_answer": 4.411764705882353
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user