Model: CCCCCyx/Qwen3-8B-onpolicy-profiling-adam-20260403_091551 Source: Original Platform
8 lines
1.3 KiB
JSON
8 lines
1.3 KiB
JSON
["gpqa_diamond", {"avg_k": 0.5812182741116751, "pass_k": 0.7614213197969543, "avg_total_tokens": 10732.243654822336, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["hmmt2025", {"avg_k": 0.375, "pass_k": 0.5333333333333333, "avg_total_tokens": 18450.008333333335, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["aime2024", {"avg_k": 0.7020833333333333, "pass_k": 0.9333333333333333, "avg_total_tokens": 13840.913541666667, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["aime2025", {"avg_k": 0.6010416666666667, "pass_k": 0.8666666666666667, "avg_total_tokens": 15299.15, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["math500", {"avg_k": 0.952, "pass_k": 0.98, "avg_total_tokens": 4403.267, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["minerva", {"avg_k": 0.484375, "pass_k": 0.5661764705882353, "avg_total_tokens": 6378.409007352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
|
["overall", {"avg_k": 0.7074036511156186, "pass_k": 0.8158640226628895, "avg_total_tokens": 9194.001521298174, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|