初始化项目,由ModelHub XC社区提供模型
Model: seopbo/zerorlvrcode-qwen2.5-1.5b Source: Original Platform
This commit is contained in:
11
eval-results/gsm8k/metrics.json
Normal file
11
eval-results/gsm8k/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"gsm8k": {
|
||||
"pass@1": {
|
||||
"num_entries": 1319,
|
||||
"avg_tokens": 1586,
|
||||
"gen_seconds": 105,
|
||||
"symbolic_correct": 0.1516300227445034,
|
||||
"no_answer": 5.231235784685368
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/hendrycks_math/metrics.json
Normal file
11
eval-results/hendrycks_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"hendrycks_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 5000,
|
||||
"avg_tokens": 1118,
|
||||
"gen_seconds": 192,
|
||||
"symbolic_correct": 2.58,
|
||||
"no_answer": 12.28
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/human-eval/metrics.json
Normal file
11
eval-results/human-eval/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"human-eval": {
|
||||
"pass@1": {
|
||||
"num_entries": 164,
|
||||
"avg_tokens": 427,
|
||||
"gen_seconds": 34,
|
||||
"passing_base_tests": 28.658536585365855,
|
||||
"passing_plus_tests": 26.21951219512195
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifbench/metrics.json
Normal file
16
eval-results/ifbench/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifbench": {
|
||||
"pass@1": {
|
||||
"num_prompts": 294,
|
||||
"num_instructions": 335,
|
||||
"average_score": 20.91912884556808,
|
||||
"prompt_strict_accuracy": 18.70748299319728,
|
||||
"instruction_strict_accuracy": 20.8955223880597,
|
||||
"prompt_loose_accuracy": 21.08843537414966,
|
||||
"instruction_loose_accuracy": 22.98507462686567,
|
||||
"num_entries": 294,
|
||||
"avg_tokens": 1612,
|
||||
"gen_seconds": 37
|
||||
}
|
||||
}
|
||||
}
|
||||
16
eval-results/ifeval/metrics.json
Normal file
16
eval-results/ifeval/metrics.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"ifeval": {
|
||||
"pass@1": {
|
||||
"num_prompts": 541,
|
||||
"num_instructions": 834,
|
||||
"average_score": 27.84218096871855,
|
||||
"prompt_strict_accuracy": 20.51756007393715,
|
||||
"instruction_strict_accuracy": 32.49400479616307,
|
||||
"prompt_loose_accuracy": 23.10536044362292,
|
||||
"instruction_loose_accuracy": 35.25179856115108,
|
||||
"num_entries": 541,
|
||||
"avg_tokens": 1555,
|
||||
"gen_seconds": 77
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/mbpp/metrics.json
Normal file
11
eval-results/mbpp/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"mbpp": {
|
||||
"pass@1": {
|
||||
"num_entries": 378,
|
||||
"avg_tokens": 150,
|
||||
"gen_seconds": 35,
|
||||
"passing_base_tests": 59.78835978835979,
|
||||
"passing_plus_tests": 52.116402116402114
|
||||
}
|
||||
}
|
||||
}
|
||||
11
eval-results/minerva_math/metrics.json
Normal file
11
eval-results/minerva_math/metrics.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"minerva_math": {
|
||||
"pass@1": {
|
||||
"num_entries": 272,
|
||||
"avg_tokens": 1813,
|
||||
"gen_seconds": 64,
|
||||
"symbolic_correct": 0.0,
|
||||
"no_answer": 10.661764705882353
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user