初始化项目,由ModelHub XC社区提供模型
Model: seopbo/zerorlvrif-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 57,
|
||||
"gen_seconds": 49,
|
||||
"symbolic_correct": 9.476876421531463,
|
||||
"no_answer": 4.321455648218347
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 454,
|
||||
"gen_seconds": 136,
|
||||
"symbolic_correct": 11.86,
|
||||
"no_answer": 18.66
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 904,
|
||||
"gen_seconds": 35,
|
||||
"passing_base_tests": 11.585365853658537,
|
||||
"passing_plus_tests": 10.365853658536585
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 24.357041323992284,
|
||||
"prompt_strict_accuracy": 22.108843537414966,
|
||||
"instruction_strict_accuracy": 23.582089552238806,
|
||||
"prompt_loose_accuracy": 25.170068027210885,
|
||||
"instruction_loose_accuracy": 26.56716417910448,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 1242,
|
||||
"gen_seconds": 36
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 41.75820157182941,
|
||||
"prompt_strict_accuracy": 34.56561922365989,
|
||||
"instruction_strict_accuracy": 47.00239808153477,
|
||||
"prompt_loose_accuracy": 36.78373382624769,
|
||||
"instruction_loose_accuracy": 48.6810551558753,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 883,
|
||||
"gen_seconds": 41
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 781,
|
||||
"gen_seconds": 36,
|
||||
"passing_base_tests": 12.433862433862434,
|
||||
"passing_plus_tests": 10.846560846560847
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 884,
|
||||
"gen_seconds": 37,
|
||||
"symbolic_correct": 3.676470588235294,
|
||||
"no_answer": 16.176470588235293
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user