Files

8 lines
1.3 KiB
Plaintext
Raw Permalink Normal View History

["gpqa_diamond", {"avg_k": 0.5774111675126904, "pass_k": 0.766497461928934, "avg_total_tokens": 10612.370558375635, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["hmmt2025", {"avg_k": 0.31666666666666665, "pass_k": 0.43333333333333335, "avg_total_tokens": 18237.566666666666, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["aime2024", {"avg_k": 0.6802083333333333, "pass_k": 0.9, "avg_total_tokens": 14426.277083333332, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["aime2025", {"avg_k": 0.571875, "pass_k": 0.8666666666666667, "avg_total_tokens": 15452.192708333334, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["math500", {"avg_k": 0.7485, "pass_k": 0.768, "avg_total_tokens": 4490.75, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["minerva", {"avg_k": 0.3290441176470588, "pass_k": 0.38235294117647056, "avg_total_tokens": 6507.237132352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
["overall", {"avg_k": 0.6000676132521975, "pass_k": 0.6657223796033994, "avg_total_tokens": 9346.815584854632, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]