Files
rlvrmulti-qwen2.5-1.5b/eval-results/ifbench/metrics.json

16 lines
433 B
JSON
Raw Normal View History

{
"ifbench": {
"pass@1": {
"num_prompts": 294,
"num_instructions": 335,
"average_score": 19.80150269062849,
"prompt_strict_accuracy": 16.666666666666664,
"instruction_strict_accuracy": 18.507462686567163,
"prompt_loose_accuracy": 20.74829931972789,
"instruction_loose_accuracy": 23.283582089552237,
"num_entries": 294,
"avg_tokens": 495,
"gen_seconds": 23
}
}
}