初始化项目,由ModelHub XC社区提供模型
Model: CCCCCyx/Qwen3-8B-onpolicy-profiling-muon-20260413_090005 Source: Original Platform
This commit is contained in:
7
eval_20260417051759/step03_final_eval.jsonl
Normal file
7
eval_20260417051759/step03_final_eval.jsonl
Normal file
@@ -0,0 +1,7 @@
|
||||
["gpqa_diamond", {"avg_k": 0.5774111675126904, "pass_k": 0.766497461928934, "avg_total_tokens": 10612.370558375635, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["hmmt2025", {"avg_k": 0.31666666666666665, "pass_k": 0.43333333333333335, "avg_total_tokens": 18237.566666666666, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["aime2024", {"avg_k": 0.6802083333333333, "pass_k": 0.9, "avg_total_tokens": 14426.277083333332, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["aime2025", {"avg_k": 0.571875, "pass_k": 0.8666666666666667, "avg_total_tokens": 15452.192708333334, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["math500", {"avg_k": 0.7485, "pass_k": 0.768, "avg_total_tokens": 4490.75, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["minerva", {"avg_k": 0.3290441176470588, "pass_k": 0.38235294117647056, "avg_total_tokens": 6507.237132352941, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
["overall", {"avg_k": 0.6000676132521975, "pass_k": 0.6657223796033994, "avg_total_tokens": 9346.815584854632, "avg_thinking_tokens": 0.0, "max_thinking_tokens": 0.0, "min_thinking_tokens": 0.0}]
|
||||
Reference in New Issue
Block a user