["gpqa_diamond", {"avg_k": 0.5812182741116751, "pass_k": 0.7614213197969543, "avg_total_tokens": 10732.243654822336, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["hmmt2025", {"avg_k": 0.375, "pass_k": 0.5333333333333333, "avg_total_tokens": 18450.008333333335, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["aime2024", {"avg_k": 0.7020833333333333, "pass_k": 0.9333333333333333, "avg_total_tokens": 13840.913541666667, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["aime2025", {"avg_k": 0.6010416666666667, "pass_k": 0.8666666666666667, "avg_total_tokens": 15299.15, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["math500", {"avg_k": 0.952, "pass_k": 0.98, "avg_total_tokens": 4403.267, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["minerva", {"avg_k": 0.484375, "pass_k": 0.5661764705882353, "avg_total_tokens": 6378.409007352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}] ["overall", {"avg_k": 0.7074036511156186, "pass_k": 0.8158640226628895, "avg_total_tokens": 9194.001521298174, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]