{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.44921875, "calib/ece": 0.16135714285714264, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.12215151515151501, "calib/mean_conf": 0.9256428571428571, "calib/mu_c": 0.9518181818181817, "calib/mu_w": 0.8296666666666667, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.15064285714285694, "calib/std_conf": 0.1237495207164497, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.8832727272727273, "calib/step_q_c_n": 55.0, "calib/step_q_gap": 0.042872727272727285, "calib/step_q_w": 0.8404, "calib/step_q_w_n": 60.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2909.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 579.68359375, "completions/mean_terminated_length": 648.0305786132812, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0010666666666666667, "grad_norm": 0.6144071817398071, "learning_rate": 0.0, "loss": -0.0112, "num_tokens": 255983.0, "reward": 0.068359375, "reward_std": 0.14980050921440125, "rewards/accuracy_reward_step": 0.04296875, "rewards/format_reward_step": 0.05078125, "step": 1 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.6484615384615385, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.9230769230769231, "calib/gap": 0.016388888888888786, "calib/mean_conf": 0.9561538461538462, "calib/mu_c": 0.9674999999999999, "calib/mu_w": 0.9511111111111111, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.6484615384615385, "calib/std_conf": 0.024663414679817527, "calib/step_conf_rate": 0.06640625, "calib/step_q_c": 0.8756521739130435, "calib/step_q_c_n": 23.0, "calib/step_q_gap": 0.015072463768115885, "calib/step_q_w": 0.8605797101449276, "calib/step_q_w_n": 69.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 785.5390625, "completions/mean_terminated_length": 827.5637817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.46494486927986145, "learning_rate": 2.5000000000000004e-07, "loss": -0.0007, "num_tokens": 560369.0, "reward": 0.0390625, "reward_std": 0.0974610224366188, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.046875, "step": 2 }, { "calib/answer_extract_rate": 0.03515625, "calib/avg_num_step_conf": 0.16796875, "calib/ece": 0.52625, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": -0.12750000000000017, "calib/mean_conf": 0.90125, "calib/mu_c": 0.8374999999999999, "calib/mu_w": 0.9650000000000001, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.46375, "calib/std_conf": 0.1540647185438639, "calib/step_conf_rate": 0.02734375, "calib/step_q_c": 0.8846666666666667, "calib/step_q_c_n": 15.0, "calib/step_q_gap": 0.11323809523809536, "calib/step_q_w": 0.7714285714285714, "calib/step_q_w_n": 28.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 700.59765625, "completions/mean_terminated_length": 769.75537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.32013899087905884, "learning_rate": 5.000000000000001e-07, "loss": 0.0059, "num_tokens": 844978.0, "reward": 0.025390625, "reward_std": 0.07181552797555923, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.01953125, "step": 3 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.0234375, "calib/ece": 0.9333333333333333, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/mean_conf": 0.9333333333333332, "calib/mu_c": NaN, "calib/mu_w": 0.9333333333333332, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.9333333333333333, "calib/std_conf": 0.06523461930260308, "calib/step_conf_rate": 0.00390625, "calib/step_q_w": 0.8500000000000001, "calib/step_q_w_n": 6.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 701.765625, "completions/mean_terminated_length": 787.9473876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.044668663293123245, "learning_rate": 7.5e-07, "loss": 0.0007, "num_tokens": 1130798.0, "reward": 0.001953125, "reward_std": 0.005524271633476019, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "step": 4 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.5896428571428574, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.0454444444444444, "calib/mean_conf": 0.9467857142857143, "calib/mu_c": 0.976, "calib/mu_w": 0.9305555555555556, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.5896428571428574, "calib/std_conf": 0.08433790756866821, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.8807142857142859, "calib/step_q_c_n": 14.0, "calib/step_q_gap": -0.013376623376623153, "calib/step_q_w": 0.894090909090909, "calib/step_q_w_n": 55.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 699.68359375, "completions/mean_terminated_length": 762.20849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.7100959420204163, "learning_rate": 1.0000000000000002e-06, "loss": 0.0069, "num_tokens": 1416605.0, "reward": 0.04296875, "reward_std": 0.0989597737789154, "rewards/accuracy_reward_step": 0.01953125, "rewards/format_reward_step": 0.046875, "step": 5 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.6266666666666667, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": 0.00374999999999992, "calib/mean_conf": 0.9600000000000001, "calib/mu_c": 0.9624999999999999, "calib/mu_w": 0.95875, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.6266666666666667, "calib/std_conf": 0.03415650255319866, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.7994444444444445, "calib/step_q_c_n": 18.0, "calib/step_q_gap": -0.07119385342789597, "calib/step_q_w": 0.8706382978723405, "calib/step_q_w_n": 47.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 611.203125, "completions/mean_terminated_length": 674.4310302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.41114652156829834, "learning_rate": 1.25e-06, "loss": -0.0202, "num_tokens": 1679025.0, "reward": 0.037109375, "reward_std": 0.09193675220012665, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.04296875, "step": 6 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.28515625, "calib/ece": 0.6388888888888888, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": 0.01666666666666672, "calib/mean_conf": 0.9722222222222222, "calib/mu_c": 0.9833333333333334, "calib/mu_w": 0.9666666666666667, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6388888888888888, "calib/std_conf": 0.029355210696939787, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.9376666666666664, "calib/step_q_c_n": 30.0, "calib/step_q_gap": 0.04092248062015491, "calib/step_q_w": 0.8967441860465115, "calib/step_q_w_n": 43.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 690.18359375, "completions/mean_terminated_length": 778.3568115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.3305240869522095, "learning_rate": 1.5e-06, "loss": -0.0039, "num_tokens": 1963136.0, "reward": 0.025390625, "reward_std": 0.0679999589920044, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.02734375, "step": 7 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.21484375, "calib/ece": 0.4277777777777778, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": -0.010000000000000009, "calib/mean_conf": 0.9544444444444444, "calib/mu_c": 0.95, "calib/mu_w": 0.96, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.4133333333333334, "calib/std_conf": 0.037151674438445526, "calib/step_conf_rate": 0.0234375, "calib/step_q_c": 0.8305555555555555, "calib/step_q_c_n": 18.0, "calib/step_q_gap": 0.07939879879879874, "calib/step_q_w": 0.7511567567567567, "calib/step_q_w_n": 37.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2928.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 646.625, "completions/mean_terminated_length": 719.7216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.33451029658317566, "learning_rate": 1.75e-06, "loss": -0.0008, "num_tokens": 2235184.0, "reward": 0.029296875, "reward_std": 0.0774708092212677, "rewards/accuracy_reward_step": 0.01953125, "rewards/format_reward_step": 0.01953125, "step": 8 }, { "calib/answer_extract_rate": 0.05859375, "calib/avg_num_step_conf": 0.34375, "calib/ece": 0.4984615384615386, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9230769230769231, "calib/gap": 0.040238095238095295, "calib/mean_conf": 0.9600000000000002, "calib/mu_c": 0.9816666666666668, "calib/mu_w": 0.9414285714285715, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.4984615384615386, "calib/std_conf": 0.025720389995846898, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.9084375, "calib/step_q_c_n": 32.0, "calib/step_q_gap": 0.04022321428571429, "calib/step_q_w": 0.8682142857142857, "calib/step_q_w_n": 56.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 652.80078125, "completions/mean_terminated_length": 696.3208618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.47666192054748535, "learning_rate": 2.0000000000000003e-06, "loss": 0.0229, "num_tokens": 2509837.0, "reward": 0.046875, "reward_std": 0.1082572191953659, "rewards/accuracy_reward_step": 0.02734375, "rewards/format_reward_step": 0.0390625, "step": 9 }, { "calib/answer_extract_rate": 0.07421875, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.4323529411764707, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0033333333333334103, "calib/mean_conf": 0.9617647058823531, "calib/mu_c": 0.9633333333333334, "calib/mu_w": 0.96, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.4323529411764707, "calib/std_conf": 0.01854574443580525, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.8643589743589746, "calib/step_q_c_n": 39.0, "calib/step_q_gap": 0.015296474358974521, "calib/step_q_w": 0.8490625, "calib/step_q_w_n": 32.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 610.796875, "completions/mean_terminated_length": 673.9827270507812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.35018742084503174, "learning_rate": 2.25e-06, "loss": -0.0116, "num_tokens": 2773001.0, "reward": 0.0625, "reward_std": 0.09454704821109772, "rewards/accuracy_reward_step": 0.03515625, "rewards/format_reward_step": 0.0546875, "step": 10 }, { "calib/answer_extract_rate": 0.1015625, "calib/avg_num_step_conf": 0.62109375, "calib/ece": 0.52276, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.96, "calib/gap": -0.0033051948051948665, "calib/mean_conf": 0.96276, "calib/mu_c": 0.9609090909090909, "calib/mu_w": 0.9642142857142858, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.52276, "calib/std_conf": 0.027734858932397695, "calib/step_conf_rate": 0.10546875, "calib/step_q_c": 0.8749122807017544, "calib/step_q_c_n": 57.0, "calib/step_q_gap": 0.014226006191950247, "calib/step_q_w": 0.8606862745098042, "calib/step_q_w_n": 102.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 690.19140625, "completions/mean_terminated_length": 739.2844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.3576546311378479, "learning_rate": 2.5e-06, "loss": -0.0108, "num_tokens": 3054170.0, "reward": 0.08203125, "reward_std": 0.18240919709205627, "rewards/accuracy_reward_step": 0.04296875, "rewards/format_reward_step": 0.078125, "step": 11 }, { "calib/answer_extract_rate": 0.17578125, "calib/avg_num_step_conf": 1.23828125, "calib/ece": 0.5485106382978725, "calib/final_conf_rate": 0.18359375, "calib/format_rate": 0.1484375, "calib/frac_conf_gt_0.9": 0.8723404255319149, "calib/gap": -0.03720370370370363, "calib/mean_conf": 0.937872340425532, "calib/mu_c": 0.9165000000000001, "calib/mu_w": 0.9537037037037037, "calib/nonempty_final_conf_rate": 0.18359375, "calib/nonempty_reasoning_rate": 0.20703125, "calib/nonempty_step_conf_rate": 0.1875, "calib/pce": 0.5304255319148937, "calib/std_conf": 0.12163664359367686, "calib/step_conf_rate": 0.1875, "calib/step_q_c": 0.8323376623376624, "calib/step_q_c_n": 154.0, "calib/step_q_gap": -0.035331049318779395, "calib/step_q_w": 0.8676687116564418, "calib/step_q_w_n": 163.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 557.2578125, "completions/mean_terminated_length": 582.2775268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0128, "grad_norm": 0.4705829620361328, "learning_rate": 2.7500000000000004e-06, "loss": -0.0058, "num_tokens": 3301004.0, "reward": 0.15234375, "reward_std": 0.30028218030929565, "rewards/accuracy_reward_step": 0.078125, "rewards/format_reward_step": 0.1484375, "step": 12 }, { "calib/answer_extract_rate": 0.2109375, "calib/avg_num_step_conf": 1.44140625, "calib/ece": 0.4812499999999999, "calib/final_conf_rate": 0.203125, "calib/format_rate": 0.1640625, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": -0.03523703703703718, "calib/mean_conf": 0.9350961538461535, "calib/mu_c": 0.9168, "calib/mu_w": 0.9520370370370371, "calib/nonempty_final_conf_rate": 0.203125, "calib/nonempty_reasoning_rate": 0.2578125, "calib/nonempty_step_conf_rate": 0.23828125, "calib/pce": 0.46778846153846143, "calib/std_conf": 0.09902160984338888, "calib/step_conf_rate": 0.23828125, "calib/step_q_c": 0.8649668874172186, "calib/step_q_c_n": 151.0, "calib/step_q_gap": -0.019712011665350082, "calib/step_q_w": 0.8846788990825687, "calib/step_q_w_n": 218.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 631.0546875, "completions/mean_terminated_length": 684.5338745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.013866666666666666, "grad_norm": 0.508955180644989, "learning_rate": 3e-06, "loss": -0.0062, "num_tokens": 3567146.0, "reward": 0.18359375, "reward_std": 0.2880544066429138, "rewards/accuracy_reward_step": 0.1015625, "rewards/format_reward_step": 0.1640625, "step": 13 }, { "calib/answer_extract_rate": 0.37109375, "calib/avg_num_step_conf": 1.9453125, "calib/ece": 0.5424275362318839, "calib/final_conf_rate": 0.359375, "calib/format_rate": 0.28125, "calib/frac_conf_gt_0.9": 0.9130434782608695, "calib/gap": 0.023949221949222, "calib/mean_conf": 0.9446014492753624, "calib/mu_c": 0.9589189189189188, "calib/mu_w": 0.9349696969696968, "calib/nonempty_final_conf_rate": 0.359375, "calib/nonempty_reasoning_rate": 0.3984375, "calib/nonempty_step_conf_rate": 0.33203125, "calib/pce": 0.5424275362318839, "calib/std_conf": 0.08500616969837242, "calib/step_conf_rate": 0.33203125, "calib/step_q_c": 0.8756976744186047, "calib/step_q_c_n": 172.0, "calib/step_q_gap": 0.018601559899177222, "calib/step_q_w": 0.8570961145194275, "calib/step_q_w_n": 326.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 531.89453125, "completions/mean_terminated_length": 553.5162353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.014933333333333333, "grad_norm": 0.6900785565376282, "learning_rate": 3.2500000000000002e-06, "loss": -0.0625, "num_tokens": 3808711.0, "reward": 0.28515625, "reward_std": 0.34519943594932556, "rewards/accuracy_reward_step": 0.14453125, "rewards/format_reward_step": 0.28125, "step": 14 }, { "calib/answer_extract_rate": 0.56640625, "calib/avg_num_step_conf": 3.875, "calib/ece": 0.5345314685314685, "calib/final_conf_rate": 0.55859375, "calib/format_rate": 0.4921875, "calib/frac_conf_gt_0.9": 0.8741258741258742, "calib/gap": 0.011973970944310075, "calib/mean_conf": 0.947118881118881, "calib/mu_c": 0.9541525423728814, "calib/mu_w": 0.9421785714285713, "calib/nonempty_final_conf_rate": 0.55859375, "calib/nonempty_reasoning_rate": 0.63671875, "calib/nonempty_step_conf_rate": 0.5859375, "calib/pce": 0.5345314685314685, "calib/std_conf": 0.0888806451600279, "calib/step_conf_rate": 0.5859375, "calib/step_q_c": 0.8749010695187166, "calib/step_q_c_n": 374.0, "calib/step_q_gap": 0.0007360209750272295, "calib/step_q_w": 0.8741650485436894, "calib/step_q_w_n": 618.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 522.0625, "completions/mean_terminated_length": 532.462158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.016, "grad_norm": 0.9986044764518738, "learning_rate": 3.5e-06, "loss": -0.0452, "num_tokens": 4050239.0, "reward": 0.48046875, "reward_std": 0.4949692487716675, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.4921875, "step": 15 }, { "calib/answer_extract_rate": 0.68359375, "calib/avg_num_step_conf": 5.328125, "calib/ece": 0.5159766081871344, "calib/final_conf_rate": 0.66796875, "calib/format_rate": 0.609375, "calib/frac_conf_gt_0.9": 0.9239766081871345, "calib/gap": 0.00936842105263147, "calib/mean_conf": 0.9557426900584794, "calib/mu_c": 0.9609473684210526, "calib/mu_w": 0.9515789473684211, "calib/nonempty_final_conf_rate": 0.66796875, "calib/nonempty_reasoning_rate": 0.76953125, "calib/nonempty_step_conf_rate": 0.734375, "calib/pce": 0.5136374269005847, "calib/std_conf": 0.0471109601210145, "calib/step_conf_rate": 0.734375, "calib/step_q_c": 0.8763611615245009, "calib/step_q_c_n": 551.0, "calib/step_q_gap": -0.0035773378604930794, "calib/step_q_w": 0.879938499384994, "calib/step_q_w_n": 813.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 500.1796875, "completions/mean_terminated_length": 510.1434326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.017066666666666667, "grad_norm": 0.8285671472549438, "learning_rate": 3.7500000000000005e-06, "loss": -0.0197, "num_tokens": 4287133.0, "reward": 0.6171875, "reward_std": 0.49173909425735474, "rewards/accuracy_reward_step": 0.3125, "rewards/format_reward_step": 0.609375, "step": 16 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 6.94140625, "calib/ece": 0.4604086956521738, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.9130434782608695, "calib/gap": 0.006691167574107593, "calib/mean_conf": 0.9517130434782608, "calib/mu_c": 0.9550877192982455, "calib/mu_w": 0.948396551724138, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.45823478260869555, "calib/std_conf": 0.0747794165142755, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.879920993227991, "calib/step_q_c_n": 886.0, "calib/step_q_gap": 0.01322290119656555, "calib/step_q_w": 0.8666980920314254, "calib/step_q_w_n": 891.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2139.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 411.65625, "completions/mean_terminated_length": 414.89764404296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.8043549656867981, "learning_rate": 4.000000000000001e-06, "loss": -0.0669, "num_tokens": 4496045.0, "reward": 0.873046875, "reward_std": 0.5186734199523926, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.83203125, "step": 17 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 6.6328125, "calib/ece": 0.5076639004149379, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.91701244813278, "calib/gap": 0.004045808966861464, "calib/mean_conf": 0.9539709543568464, "calib/mu_c": 0.9562037037037037, "calib/mu_w": 0.9521578947368422, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.5067510373443985, "calib/std_conf": 0.047515395918551195, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.8654918032786885, "calib/step_q_c_n": 732.0, "calib/step_q_gap": 0.02443279706750834, "calib/step_q_w": 0.8410590062111801, "calib/step_q_w_n": 966.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1605.0, "completions/max_terminated_length": 1605.0, "completions/mean_length": 394.25, "completions/mean_terminated_length": 395.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.47119131684303284, "learning_rate": 4.25e-06, "loss": -0.0478, "num_tokens": 4707693.0, "reward": 0.857421875, "reward_std": 0.4101829528808594, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.87109375, "step": 18 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.15625, "calib/ece": 0.512344, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.92, "calib/gap": 0.009028571428571275, "calib/mean_conf": 0.9495439999999998, "calib/mu_c": 0.9545999999999998, "calib/mu_w": 0.9455714285714285, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.510944, "calib/std_conf": 0.047297527038947816, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8617517814726842, "calib/step_q_c_n": 842.0, "calib/step_q_gap": 0.01605481177571444, "calib/step_q_w": 0.8456969696969697, "calib/step_q_w_n": 990.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 801.0, "completions/max_terminated_length": 801.0, "completions/mean_length": 355.06640625, "completions/mean_terminated_length": 357.8622131347656, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.020266666666666665, "grad_norm": 0.7457324862480164, "learning_rate": 4.5e-06, "loss": -0.0078, "num_tokens": 4903350.0, "reward": 0.90234375, "reward_std": 0.35737013816833496, "rewards/accuracy_reward_step": 0.4296875, "rewards/format_reward_step": 0.9453125, "step": 19 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 7.24609375, "calib/ece": 0.48309236947791157, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9236947791164659, "calib/gap": -0.0017365967365967627, "calib/mean_conf": 0.9529718875502008, "calib/mu_c": 0.952051282051282, "calib/mu_w": 0.9537878787878787, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.48309236947791157, "calib/std_conf": 0.03786473511743426, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8567741935483871, "calib/step_q_c_n": 837.0, "calib/step_q_gap": 0.000634704353888127, "calib/step_q_w": 0.856139489194499, "calib/step_q_w_n": 1018.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1159.0, "completions/max_terminated_length": 1159.0, "completions/mean_length": 364.70703125, "completions/mean_terminated_length": 367.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.021333333333333333, "grad_norm": 0.5896674394607544, "learning_rate": 4.75e-06, "loss": -0.0232, "num_tokens": 5101587.0, "reward": 0.931640625, "reward_std": 0.35057777166366577, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.94921875, "step": 20 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 7.8984375, "calib/ece": 0.37701960784313726, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9176470588235294, "calib/gap": 0.006309538770893464, "calib/mean_conf": 0.9468235294117647, "calib/mu_c": 0.9495205479452055, "calib/mu_w": 0.943211009174312, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3756470588235294, "calib/std_conf": 0.07272637568913073, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8523810810810811, "calib/step_q_c_n": 1110.0, "calib/step_q_gap": -0.0064018136557609795, "calib/step_q_w": 0.8587828947368421, "calib/step_q_w_n": 912.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 874.0, "completions/max_terminated_length": 874.0, "completions/mean_length": 372.984375, "completions/mean_terminated_length": 375.9212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.0224, "grad_norm": 0.3262301981449127, "learning_rate": 5e-06, "loss": -0.0064, "num_tokens": 5300031.0, "reward": 1.068359375, "reward_std": 0.39928555488586426, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.99609375, "step": 21 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.52734375, "calib/ece": 0.3571126482213438, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9604743083003953, "calib/gap": 0.012833246482542915, "calib/mean_conf": 0.9579031620553359, "calib/mu_c": 0.9630263157894736, "calib/mu_w": 0.9501930693069307, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3571126482213438, "calib/std_conf": 0.04993861360051285, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.869948876839659, "calib/step_q_c_n": 1291.0, "calib/step_q_gap": 0.000882733341901254, "calib/step_q_w": 0.8690661434977578, "calib/step_q_w_n": 892.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 833.0, "completions/max_terminated_length": 833.0, "completions/mean_length": 390.2734375, "completions/mean_terminated_length": 393.3464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.023466666666666667, "grad_norm": 0.8765632510185242, "learning_rate": 4.9722222222222224e-06, "loss": -0.0115, "num_tokens": 5501757.0, "reward": 1.08984375, "reward_std": 0.3492136001586914, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.984375, "step": 22 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 9.171875, "calib/ece": 0.43206640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.8828125, "calib/gap": -0.017310294117647174, "calib/mean_conf": 0.94362890625, "calib/mu_c": 0.9355147058823529, "calib/mu_w": 0.952825, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42222265625, "calib/std_conf": 0.07139813947631066, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8419904076738609, "calib/step_q_c_n": 1251.0, "calib/step_q_gap": -0.021208316118299653, "calib/step_q_w": 0.8631987237921606, "calib/step_q_w_n": 1097.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1567.0, "completions/max_terminated_length": 1567.0, "completions/mean_length": 414.875, "completions/mean_terminated_length": 418.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.024533333333333334, "grad_norm": 0.8596509099006653, "learning_rate": 4.944444444444445e-06, "loss": -0.0007, "num_tokens": 5711901.0, "reward": 1.03125, "reward_std": 0.38387420773506165, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 1.0, "step": 23 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.671875, "calib/ece": 0.47250000000000003, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.876984126984127, "calib/gap": 0.005223352498894274, "calib/mean_conf": 0.9447222222222222, "calib/mu_c": 0.9474789915966386, "calib/mu_w": 0.9422556390977443, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.47250000000000003, "calib/std_conf": 0.05814870631282675, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8465031503150314, "calib/step_q_c_n": 1111.0, "calib/step_q_gap": 0.0033529671648482307, "calib/step_q_w": 0.8431501831501832, "calib/step_q_w_n": 1365.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 465.26171875, "completions/mean_terminated_length": 465.26171875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.0256, "grad_norm": 0.7665349841117859, "learning_rate": 4.9166666666666665e-06, "loss": 0.0118, "num_tokens": 5935520.0, "reward": 0.95703125, "reward_std": 0.39235660433769226, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.984375, "step": 24 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 9.9140625, "calib/ece": 0.3672117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8235294117647058, "calib/gap": -0.0018439110217415289, "calib/mean_conf": 0.9397607843137256, "calib/mu_c": 0.9389726027397263, "calib/mu_w": 0.9408165137614678, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3672117647058823, "calib/std_conf": 0.047098828420678765, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8269407407407406, "calib/step_q_c_n": 1350.0, "calib/step_q_gap": -0.0189683501683503, "calib/step_q_w": 0.8459090909090909, "calib/step_q_w_n": 1188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 437.8515625, "completions/mean_terminated_length": 441.2992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.02666666666666667, "grad_norm": 0.8922926783561707, "learning_rate": 4.888888888888889e-06, "loss": 0.0266, "num_tokens": 6150834.0, "reward": 1.068359375, "reward_std": 0.30944645404815674, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.99609375, "step": 25 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.3046875, "calib/ece": 0.3494488188976378, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6889763779527559, "calib/gap": 0.005464231354642468, "calib/mean_conf": 0.924251968503937, "calib/mu_c": 0.9265753424657535, "calib/mu_w": 0.921111111111111, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3494488188976378, "calib/std_conf": 0.05110618381061187, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8115927873779114, "calib/step_q_c_n": 1331.0, "calib/step_q_gap": -0.00734156086186033, "calib/step_q_w": 0.8189343482397717, "calib/step_q_w_n": 1051.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 437.19921875, "completions/mean_terminated_length": 438.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.027733333333333332, "grad_norm": 0.5688765048980713, "learning_rate": 4.861111111111111e-06, "loss": 0.0257, "num_tokens": 6367997.0, "reward": 1.0625, "reward_std": 0.3060663640499115, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.984375, "step": 26 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 9.58984375, "calib/ece": 0.3891764705882352, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5215686274509804, "calib/gap": 0.0022369230769230253, "calib/mean_conf": 0.8989803921568628, "calib/mu_c": 0.900076923076923, "calib/mu_w": 0.89784, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3891764705882352, "calib/std_conf": 0.06398942626993945, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7787020905923345, "calib/step_q_c_n": 1148.0, "calib/step_q_gap": 0.005649297937399433, "calib/step_q_w": 0.7730527926549351, "calib/step_q_w_n": 1307.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1641.0, "completions/max_terminated_length": 1641.0, "completions/mean_length": 456.8359375, "completions/mean_terminated_length": 460.4330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.0288, "grad_norm": 1.1152070760726929, "learning_rate": 4.833333333333333e-06, "loss": 0.0124, "num_tokens": 6590163.0, "reward": 1.005859375, "reward_std": 0.30036985874176025, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.99609375, "step": 27 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.1484375, "calib/ece": 0.21065625000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.33203125, "calib/gap": -0.01341971207087489, "calib/mean_conf": 0.86253125, "calib/mu_c": 0.8581279069767442, "calib/mu_w": 0.8715476190476191, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20065624999999998, "calib/std_conf": 0.08976757640394163, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.734737144585602, "calib/step_q_c_n": 1653.0, "calib/step_q_gap": -0.015035342186884848, "calib/step_q_w": 0.7497724867724869, "calib/step_q_w_n": 945.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1459.0, "completions/max_terminated_length": 1459.0, "completions/mean_length": 477.328125, "completions/mean_terminated_length": 481.08660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.029866666666666666, "grad_norm": 0.5947628617286682, "learning_rate": 4.805555555555556e-06, "loss": -0.0196, "num_tokens": 6819303.0, "reward": 1.171875, "reward_std": 0.27487409114837646, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 1.0, "step": 28 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.08203125, "calib/ece": 0.31074803149606295, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.24803149606299213, "calib/gap": -0.0075980148883375564, "calib/mean_conf": 0.8224015748031496, "calib/mu_c": 0.8186923076923077, "calib/mu_w": 0.8262903225806453, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3106692913385826, "calib/std_conf": 0.11648789331453538, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6888409090909091, "calib/step_q_c_n": 1320.0, "calib/step_q_gap": 0.014767079163420571, "calib/step_q_w": 0.6740738299274885, "calib/step_q_w_n": 1517.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2265.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 568.7421875, "completions/mean_terminated_length": 570.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.030933333333333334, "grad_norm": 0.6354207992553711, "learning_rate": 4.777777777777778e-06, "loss": 0.0026, "num_tokens": 7072029.0, "reward": 1.00390625, "reward_std": 0.307236909866333, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.9921875, "step": 29 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.52734375, "calib/ece": 0.1993307086614174, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.16535433070866143, "calib/gap": 0.021545130035696136, "calib/mean_conf": 0.7692519685039371, "calib/mu_c": 0.7782432432432433, "calib/mu_w": 0.7566981132075472, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19295275590551186, "calib/std_conf": 0.14552319232341973, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6220078226857888, "calib/step_q_c_n": 1534.0, "calib/step_q_gap": 0.021112043185358198, "calib/step_q_w": 0.6008957795004306, "calib/step_q_w_n": 1161.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2180.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 571.359375, "completions/mean_terminated_length": 573.6000366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.032, "grad_norm": 0.6959826350212097, "learning_rate": 4.75e-06, "loss": 0.022, "num_tokens": 7325281.0, "reward": 1.07421875, "reward_std": 0.31208619475364685, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9921875, "step": 30 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 11.01953125, "calib/ece": 0.21984313725490198, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": -0.01857487922705303, "calib/mean_conf": 0.6356078431372548, "calib/mu_c": 0.6255555555555555, "calib/mu_w": 0.6441304347826086, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19831372549019613, "calib/std_conf": 0.18196833148648528, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4764291845493562, "calib/step_q_c_n": 1165.0, "calib/step_q_gap": 0.001211793245008308, "calib/step_q_w": 0.47521739130434787, "calib/step_q_w_n": 1656.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2254.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 572.30859375, "completions/mean_terminated_length": 574.552978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.03306666666666667, "grad_norm": 0.4393335282802582, "learning_rate": 4.722222222222222e-06, "loss": 0.0167, "num_tokens": 7577704.0, "reward": 0.958984375, "reward_std": 0.28661811351776123, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.99609375, "step": 31 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.31640625, "calib/ece": 0.18234126984126986, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": -0.02955836381775856, "calib/mean_conf": 0.5640873015873015, "calib/mu_c": 0.5527096774193548, "calib/mu_w": 0.5822680412371134, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06567460317460315, "calib/std_conf": 0.17159950234143945, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.40218533886583674, "calib/step_q_c_n": 1446.0, "calib/step_q_gap": -0.01822470297516743, "calib/step_q_w": 0.42041004184100417, "calib/step_q_w_n": 1195.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2316.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 548.33984375, "completions/mean_terminated_length": 550.490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.034133333333333335, "grad_norm": 5.75811243057251, "learning_rate": 4.694444444444445e-06, "loss": 0.042, "num_tokens": 7824783.0, "reward": 1.09765625, "reward_std": 0.21498265862464905, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.984375, "step": 32 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.64453125, "calib/ece": 0.11507936507936511, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.02414760914760894, "calib/mean_conf": 0.5634126984126985, "calib/mu_c": 0.5733783783783782, "calib/mu_w": 0.5492307692307693, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.045595238095238105, "calib/std_conf": 0.16762184716878084, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.42119708994709, "calib/step_q_c_n": 1512.0, "calib/step_q_gap": 0.013291071810239197, "calib/step_q_w": 0.4079060181368508, "calib/step_q_w_n": 1213.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 577.9453125, "completions/mean_terminated_length": 577.9453125, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.0352, "grad_norm": 5.477323532104492, "learning_rate": 4.666666666666667e-06, "loss": 0.0567, "num_tokens": 8079609.0, "reward": 1.0703125, "reward_std": 0.2605133056640625, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.984375, "step": 33 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.59765625, "calib/ece": 0.1350197628458498, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.01922571656050953, "calib/mean_conf": 0.5447430830039526, "calib/mu_c": 0.5520382165605096, "calib/mu_w": 0.5328125, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029604743083003947, "calib/std_conf": 0.15592777668864602, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.40732686980609417, "calib/step_q_c_n": 1444.0, "calib/step_q_gap": 0.043407817486252165, "calib/step_q_w": 0.363919052319842, "calib/step_q_w_n": 1013.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2499.0, "completions/max_terminated_length": 2499.0, "completions/mean_length": 497.96484375, "completions/mean_terminated_length": 499.91766357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.03626666666666667, "grad_norm": 1.898915410041809, "learning_rate": 4.638888888888889e-06, "loss": 0.0402, "num_tokens": 8312200.0, "reward": 1.10546875, "reward_std": 0.35471436381340027, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.984375, "step": 34 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 10.8359375, "calib/ece": 0.11332015810276687, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.03323832463535381, "calib/mean_conf": 0.534505928853755, "calib/mu_c": 0.5468553459119496, "calib/mu_w": 0.5136170212765958, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009683794466403172, "calib/std_conf": 0.13347089275891036, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3990954773869347, "calib/step_q_c_n": 1592.0, "calib/step_q_gap": 0.009763836100978696, "calib/step_q_w": 0.389331641285956, "calib/step_q_w_n": 1182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2322.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 615.69140625, "completions/mean_terminated_length": 615.69140625, "completions/min_length": 223.0, "completions/min_terminated_length": 223.0, "epoch": 0.037333333333333336, "grad_norm": 1.230494737625122, "learning_rate": 4.611111111111112e-06, "loss": 0.0197, "num_tokens": 8579073.0, "reward": 1.115234375, "reward_std": 0.22809851169586182, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.98828125, "step": 35 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 9.29296875, "calib/ece": 0.19019607843137257, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.006778074866310102, "calib/mean_conf": 0.5654117647058824, "calib/mu_c": 0.5672192513368984, "calib/mu_w": 0.5604411764705883, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011137254901960788, "calib/std_conf": 0.13830247845887, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.43395945140131187, "calib/step_q_c_n": 1677.0, "calib/step_q_gap": -0.015314052872192396, "calib/step_q_w": 0.44927350427350426, "calib/step_q_w_n": 702.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 497.62890625, "completions/mean_terminated_length": 499.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.0384, "grad_norm": 0.30834102630615234, "learning_rate": 4.583333333333333e-06, "loss": 0.0048, "num_tokens": 8809178.0, "reward": 1.228515625, "reward_std": 0.2734318673610687, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 0.99609375, "step": 36 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 10.1796875, "calib/ece": 0.17707317073170734, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.00423618634886247, "calib/mean_conf": 0.5060162601626017, "calib/mu_c": 0.504225352112676, "calib/mu_w": 0.5084615384615385, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05292682926829267, "calib/std_conf": 0.13435325259767472, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3797255574614065, "calib/step_q_c_n": 1166.0, "calib/step_q_gap": -0.03260777587192687, "calib/step_q_w": 0.4123333333333334, "calib/step_q_w_n": 1440.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 598.4296875, "completions/mean_terminated_length": 600.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.039466666666666664, "grad_norm": 2.395369052886963, "learning_rate": 4.555555555555556e-06, "loss": 0.0397, "num_tokens": 9069472.0, "reward": 1.03515625, "reward_std": 0.3008233904838562, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.9609375, "step": 37 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 10.0625, "calib/ece": 0.13287449392712547, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.006826923076923008, "calib/mean_conf": 0.4902024291497976, "calib/mu_c": 0.493076923076923, "calib/mu_w": 0.48625, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02206477732793522, "calib/std_conf": 0.1246385848612078, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3904625199362042, "calib/step_q_c_n": 1254.0, "calib/step_q_gap": -0.022290883997230004, "calib/step_q_w": 0.4127534039334342, "calib/step_q_w_n": 1322.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 605.62890625, "completions/mean_terminated_length": 608.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.04053333333333333, "grad_norm": 3.511319875717163, "learning_rate": 4.527777777777778e-06, "loss": 0.0789, "num_tokens": 9331401.0, "reward": 1.041015625, "reward_std": 0.2475888729095459, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.96484375, "step": 38 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.88671875, "calib/ece": 0.19262948207171324, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.033144950287807395, "calib/mean_conf": 0.4936653386454184, "calib/mu_c": 0.4799319727891157, "calib/mu_w": 0.5130769230769231, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05031872509960161, "calib/std_conf": 0.1339386178083945, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3815764705882353, "calib/step_q_c_n": 1275.0, "calib/step_q_gap": -0.0407005994754589, "calib/step_q_w": 0.4222770700636942, "calib/step_q_w_n": 1256.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 606.2890625, "completions/mean_terminated_length": 608.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.0416, "grad_norm": 30.61140251159668, "learning_rate": 4.5e-06, "loss": 0.0292, "num_tokens": 9592699.0, "reward": 1.064453125, "reward_std": 0.2312648594379425, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.98046875, "step": 39 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.171875, "calib/ece": 0.12492125984251967, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.005425373134328337, "calib/mean_conf": 0.5126377952755905, "calib/mu_c": 0.5100746268656716, "calib/mu_w": 0.5155, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.055, "calib/std_conf": 0.11670227070987084, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4015594405594406, "calib/step_q_c_n": 1430.0, "calib/step_q_gap": -0.003057254500184625, "calib/step_q_w": 0.4046166950596252, "calib/step_q_w_n": 1174.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 622.18359375, "completions/mean_terminated_length": 622.18359375, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.042666666666666665, "grad_norm": 3.3288333415985107, "learning_rate": 4.472222222222223e-06, "loss": 0.0055, "num_tokens": 9858738.0, "reward": 1.01953125, "reward_std": 0.3156605362892151, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.9921875, "step": 40 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.5390625, "calib/ece": 0.299251968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.015975111996017755, "calib/mean_conf": 0.520984251968504, "calib/mu_c": 0.5179024390243904, "calib/mu_w": 0.5338775510204081, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006574803149606293, "calib/std_conf": 0.10996874890736674, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4183542234332425, "calib/step_q_c_n": 1835.0, "calib/step_q_gap": -0.0306737831565434, "calib/step_q_w": 0.4490280065897859, "calib/step_q_w_n": 607.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 538.73828125, "completions/mean_terminated_length": 540.8510131835938, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.04373333333333333, "grad_norm": 1.267921805381775, "learning_rate": 4.444444444444444e-06, "loss": 0.0312, "num_tokens": 10103903.0, "reward": 1.296875, "reward_std": 0.20018117129802704, "rewards/accuracy_reward_step": 0.80078125, "rewards/format_reward_step": 0.9921875, "step": 41 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.76953125, "calib/ece": 0.13515748031496067, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012499834491890072, "calib/mean_conf": 0.5383858267716536, "calib/mu_c": 0.5337106918238993, "calib/mu_w": 0.5462105263157894, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023779527559055134, "calib/std_conf": 0.09652705934061376, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.44076308139534887, "calib/step_q_c_n": 1376.0, "calib/step_q_gap": -0.036532660837102215, "calib/step_q_w": 0.4772957422324511, "calib/step_q_w_n": 869.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 483.89453125, "completions/mean_terminated_length": 485.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.0448, "grad_norm": 1.4127931594848633, "learning_rate": 4.416666666666667e-06, "loss": 0.0278, "num_tokens": 10332148.0, "reward": 1.1171875, "reward_std": 0.22029992938041687, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9921875, "step": 42 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 9.0703125, "calib/ece": 0.19654901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.007513745946708117, "calib/mean_conf": 0.5039999999999999, "calib/mu_c": 0.5064161849710983, "calib/mu_w": 0.4989024390243902, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011058823529411763, "calib/std_conf": 0.11128694583699156, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4124197860962567, "calib/step_q_c_n": 1496.0, "calib/step_q_gap": 0.003594120236692422, "calib/step_q_w": 0.40882566585956426, "calib/step_q_w_n": 826.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2421.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 546.68359375, "completions/mean_terminated_length": 550.9881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.04586666666666667, "grad_norm": 10.022756576538086, "learning_rate": 4.388888888888889e-06, "loss": 0.0455, "num_tokens": 10577323.0, "reward": 1.173828125, "reward_std": 0.2660510838031769, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.99609375, "step": 43 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.40625, "calib/ece": 0.14528, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015833333333333366, "calib/mean_conf": 0.4828, "calib/mu_c": 0.47646666666666665, "calib/mu_w": 0.4923, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014040000000000009, "calib/std_conf": 0.1054900943216945, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36952823920265787, "calib/step_q_c_n": 1505.0, "calib/step_q_gap": -0.03165381429173386, "calib/step_q_w": 0.40118205349439173, "calib/step_q_w_n": 1159.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 639.3671875, "completions/mean_terminated_length": 641.8745727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.046933333333333334, "grad_norm": 0.5156874656677246, "learning_rate": 4.361111111111112e-06, "loss": 0.0377, "num_tokens": 10847321.0, "reward": 1.07421875, "reward_std": 0.21972496807575226, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9765625, "step": 44 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 9.48046875, "calib/ece": 0.20632812500000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017586580086579984, "calib/mean_conf": 0.454609375, "calib/mu_c": 0.4606547619047619, "calib/mu_w": 0.4430681818181819, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023437500000000003, "calib/std_conf": 0.10552993017201033, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3628175797712221, "calib/step_q_c_n": 1661.0, "calib/step_q_gap": -0.0009291565212061204, "calib/step_q_w": 0.3637467362924282, "calib/step_q_w_n": 766.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 534.0234375, "completions/mean_terminated_length": 538.2283325195312, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.048, "grad_norm": 0.6220444440841675, "learning_rate": 4.333333333333334e-06, "loss": 0.0041, "num_tokens": 11089079.0, "reward": 1.15625, "reward_std": 0.25460314750671387, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 1.0, "step": 45 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.71484375, "calib/ece": 0.21824000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.0035470795401687516, "calib/mean_conf": 0.40152, "calib/mu_c": 0.4029530201342282, "calib/mu_w": 0.3994059405940594, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011880000000000002, "calib/std_conf": 0.10389268309173653, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31864343958487773, "calib/step_q_c_n": 1349.0, "calib/step_q_gap": -0.04035480294587801, "calib/step_q_w": 0.35899824253075574, "calib/step_q_w_n": 1138.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 616.13671875, "completions/mean_terminated_length": 616.13671875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.04906666666666667, "grad_norm": 0.6727369427680969, "learning_rate": 4.305555555555556e-06, "loss": 0.0382, "num_tokens": 11351578.0, "reward": 1.0703125, "reward_std": 0.2612079977989197, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.9765625, "step": 46 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.99609375, "calib/ece": 0.3606882591093118, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0037929717341482205, "calib/mean_conf": 0.3420647773279353, "calib/mu_c": 0.34088235294117647, "calib/mu_w": 0.3446753246753247, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007246963562753035, "calib/std_conf": 0.11379445500608808, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25505913272010516, "calib/step_q_c_n": 1522.0, "calib/step_q_gap": -0.018113480587512942, "calib/step_q_w": 0.2731726133076181, "calib/step_q_w_n": 1037.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 641.0078125, "completions/mean_terminated_length": 646.0551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.050133333333333335, "grad_norm": 0.40471020340919495, "learning_rate": 4.277777777777778e-06, "loss": 0.0425, "num_tokens": 11621652.0, "reward": 1.146484375, "reward_std": 0.12448950111865997, "rewards/accuracy_reward_step": 0.6640625, "rewards/format_reward_step": 0.96484375, "step": 47 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.5859375, "calib/ece": 0.29771653543307086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.040253488842445895, "calib/mean_conf": 0.3725196850393701, "calib/mu_c": 0.35809815950920243, "calib/mu_w": 0.3983516483516483, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014251968503937012, "calib/std_conf": 0.12516426060255484, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26928518242740135, "calib/step_q_c_n": 1343.0, "calib/step_q_gap": -0.053159262017043185, "calib/step_q_w": 0.32244444444444453, "calib/step_q_w_n": 855.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 560.6015625, "completions/mean_terminated_length": 560.6015625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.0512, "grad_norm": 0.576022744178772, "learning_rate": 4.25e-06, "loss": -0.0073, "num_tokens": 11868854.0, "reward": 1.1328125, "reward_std": 0.27789777517318726, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.9921875, "step": 48 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 9.2109375, "calib/ece": 0.3026086956521739, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.0026007326007325693, "calib/mean_conf": 0.40205533596837945, "calib/mu_c": 0.40285714285714286, "calib/mu_w": 0.4002564102564103, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006482213438735177, "calib/std_conf": 0.1381774540062094, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33195707070707065, "calib/step_q_c_n": 1584.0, "calib/step_q_gap": 0.018249060371153314, "calib/step_q_w": 0.31370801033591733, "calib/step_q_w_n": 774.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2518.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 557.3828125, "completions/mean_terminated_length": 559.5686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.05226666666666667, "grad_norm": 0.3916730582714081, "learning_rate": 4.222222222222223e-06, "loss": 0.0563, "num_tokens": 12116080.0, "reward": 1.177734375, "reward_std": 0.2767038941383362, "rewards/accuracy_reward_step": 0.68359375, "rewards/format_reward_step": 0.98828125, "step": 49 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 10.34765625, "calib/ece": 0.3345454545454546, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.01421337897589492, "calib/mean_conf": 0.35667984189723323, "calib/mu_c": 0.36128654970760227, "calib/mu_w": 0.34707317073170735, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007667984189723321, "calib/std_conf": 0.15815651260389624, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27458479532163743, "calib/step_q_c_n": 1710.0, "calib/step_q_gap": -0.02663991181361286, "calib/step_q_w": 0.3012247071352503, "calib/step_q_w_n": 939.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 591.53515625, "completions/mean_terminated_length": 593.8549194335938, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.05333333333333334, "grad_norm": 0.35859617590904236, "learning_rate": 4.194444444444445e-06, "loss": 0.0395, "num_tokens": 12372873.0, "reward": 1.162109375, "reward_std": 0.17702001333236694, "rewards/accuracy_reward_step": 0.66796875, "rewards/format_reward_step": 0.98828125, "step": 50 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 10.19921875, "calib/ece": 0.4156299212598425, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.022006750311835077, "calib/mean_conf": 0.30271653543307087, "calib/mu_c": 0.296045197740113, "calib/mu_w": 0.3180519480519481, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010748031496062994, "calib/std_conf": 0.1506184833340684, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21695022054190297, "calib/step_q_c_n": 1587.0, "calib/step_q_gap": -0.02467087320809705, "calib/step_q_w": 0.24162109375000002, "calib/step_q_w_n": 1024.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 612.70703125, "completions/mean_terminated_length": 615.10986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.0544, "grad_norm": 0.27484986186027527, "learning_rate": 4.166666666666667e-06, "loss": 0.03, "num_tokens": 12639022.0, "reward": 1.185546875, "reward_std": 0.27396029233932495, "rewards/accuracy_reward_step": 0.69140625, "rewards/format_reward_step": 0.98828125, "step": 51 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.3515625, "calib/ece": 0.5295238095238094, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.014234817813765177, "calib/mean_conf": 0.25785714285714284, "calib/mu_c": 0.26107692307692304, "calib/mu_w": 0.24684210526315786, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0067857142857142855, "calib/std_conf": 0.14199881031726735, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17987912087912086, "calib/step_q_c_n": 1820.0, "calib/step_q_gap": -0.00870972929509517, "calib/step_q_w": 0.18858885017421603, "calib/step_q_w_n": 574.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 552.58203125, "completions/mean_terminated_length": 554.7490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.055466666666666664, "grad_norm": 0.31955885887145996, "learning_rate": 4.138888888888889e-06, "loss": 0.0232, "num_tokens": 12888435.0, "reward": 1.25390625, "reward_std": 0.18764840066432953, "rewards/accuracy_reward_step": 0.76171875, "rewards/format_reward_step": 0.984375, "step": 52 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.02734375, "calib/ece": 0.4562890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.02520731612803223, "calib/mean_conf": 0.2816015625, "calib/mu_c": 0.2883957219251337, "calib/mu_w": 0.26318840579710145, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0037109375, "calib/std_conf": 0.13509897065691728, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2138771071234367, "calib/step_q_c_n": 1839.0, "calib/step_q_gap": 0.017352381848711435, "calib/step_q_w": 0.19652472527472525, "calib/step_q_w_n": 728.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1384.0, "completions/max_terminated_length": 1384.0, "completions/mean_length": 582.15234375, "completions/mean_terminated_length": 586.7362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.05653333333333333, "grad_norm": 0.3678324520587921, "learning_rate": 4.111111111111111e-06, "loss": 0.0045, "num_tokens": 13143290.0, "reward": 1.228515625, "reward_std": 0.23388169705867767, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 0.99609375, "step": 53 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.15234375, "calib/ece": 0.4665748031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.0041525810872544144, "calib/mean_conf": 0.3168897637795276, "calib/mu_c": 0.31778894472361807, "calib/mu_w": 0.31363636363636366, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.1755962050217898, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.240630068621335, "calib/step_q_c_n": 1603.0, "calib/step_q_gap": -0.00860546856874761, "calib/step_q_w": 0.24923553719008262, "calib/step_q_w_n": 484.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 522.5, "completions/mean_terminated_length": 522.5, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.0576, "grad_norm": 0.469553679227829, "learning_rate": 4.083333333333334e-06, "loss": 0.0049, "num_tokens": 13383282.0, "reward": 1.271484375, "reward_std": 0.18993809819221497, "rewards/accuracy_reward_step": 0.77734375, "rewards/format_reward_step": 0.98828125, "step": 54 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.60546875, "calib/ece": 0.3109523809523809, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.030600598603259088, "calib/mean_conf": 0.3242857142857143, "calib/mu_c": 0.33606451612903226, "calib/mu_w": 0.3054639175257732, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010079365079365077, "calib/std_conf": 0.1646951419924655, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27128185907046476, "calib/step_q_c_n": 1334.0, "calib/step_q_gap": 0.014295192403798063, "calib/step_q_w": 0.2569866666666667, "calib/step_q_w_n": 1125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 572.7109375, "completions/mean_terminated_length": 572.7109375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.058666666666666666, "grad_norm": 0.22412878274917603, "learning_rate": 4.055555555555556e-06, "loss": 0.0107, "num_tokens": 13637720.0, "reward": 1.09765625, "reward_std": 0.27271413803100586, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.984375, "step": 55 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.125, "calib/ece": 0.27885826771653544, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00040517534899559005, "calib/mean_conf": 0.37074803149606295, "calib/mu_c": 0.37060606060606055, "calib/mu_w": 0.37101123595505614, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.15235626857751916, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2981626928471248, "calib/step_q_c_n": 1426.0, "calib/step_q_gap": -0.017123021438589514, "calib/step_q_w": 0.31528571428571434, "calib/step_q_w_n": 910.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 555.58984375, "completions/mean_terminated_length": 557.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.05973333333333333, "grad_norm": 0.5936625003814697, "learning_rate": 4.027777777777779e-06, "loss": 0.0123, "num_tokens": 13886791.0, "reward": 1.140625, "reward_std": 0.2601584792137146, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.5703125, "calib/ece": 0.43661417322834645, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.020655241935483903, "calib/mean_conf": 0.3246456692913386, "calib/mu_c": 0.32968749999999997, "calib/mu_w": 0.30903225806451606, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0026771653543307085, "calib/std_conf": 0.1510295130755166, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2760599208592425, "calib/step_q_c_n": 1769.0, "calib/step_q_gap": 0.03532570646864047, "calib/step_q_w": 0.24073421439060205, "calib/step_q_w_n": 681.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 571.30859375, "completions/mean_terminated_length": 571.30859375, "completions/min_length": 201.0, "completions/min_terminated_length": 201.0, "epoch": 0.0608, "grad_norm": 0.3216277062892914, "learning_rate": 4.000000000000001e-06, "loss": 0.0214, "num_tokens": 14139838.0, "reward": 1.24609375, "reward_std": 0.2393544316291809, "rewards/accuracy_reward_step": 0.75, "rewards/format_reward_step": 0.9921875, "step": 57 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 9.72265625, "calib/ece": 0.39502008032128516, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.017761857707509876, "calib/mean_conf": 0.2745381526104418, "calib/mu_c": 0.26826086956521744, "calib/mu_w": 0.2860227272727273, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.011485943775100402, "calib/std_conf": 0.1462183530795982, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.1948092105263158, "calib/step_q_c_n": 1520.0, "calib/step_q_gap": -0.02595446336429308, "calib/step_q_w": 0.22076367389060889, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 662.15234375, "completions/mean_terminated_length": 662.15234375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.06186666666666667, "grad_norm": 0.30427154898643494, "learning_rate": 3.972222222222223e-06, "loss": 0.03, "num_tokens": 14415669.0, "reward": 1.11328125, "reward_std": 0.3100544214248657, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.96875, "step": 58 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.84375, "calib/ece": 0.4424007936507937, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.022777144079275558, "calib/mean_conf": 0.25617063492063497, "calib/mu_c": 0.24866863905325448, "calib/mu_w": 0.27144578313253004, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01396825396825397, "calib/std_conf": 0.1438725966205067, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.18537381916329285, "calib/step_q_c_n": 1482.0, "calib/step_q_gap": -0.03044459515895781, "calib/step_q_w": 0.21581841432225066, "calib/step_q_w_n": 782.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 588.91015625, "completions/mean_terminated_length": 588.91015625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.06293333333333333, "grad_norm": 0.2894952595233917, "learning_rate": 3.944444444444445e-06, "loss": 0.0276, "num_tokens": 14672678.0, "reward": 1.150390625, "reward_std": 0.2913172245025635, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.98046875, "step": 59 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.9296875, "calib/ece": 0.4264, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03491341256366723, "calib/mean_conf": 0.22688, "calib/mu_c": 0.21361290322580645, "calib/mu_w": 0.24852631578947368, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016640000000000002, "calib/std_conf": 0.13409200423589768, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.14738325281803544, "calib/step_q_c_n": 1242.0, "calib/step_q_gap": -0.033248931090010564, "calib/step_q_w": 0.180632183908046, "calib/step_q_w_n": 1044.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 568.3046875, "completions/mean_terminated_length": 572.779541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.064, "grad_norm": 0.6230308413505554, "learning_rate": 3.916666666666667e-06, "loss": 0.0147, "num_tokens": 14927020.0, "reward": 1.09375, "reward_std": 0.26803910732269287, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.9765625, "step": 60 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.0859375, "calib/ece": 0.5171259842519687, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00935286935286933, "calib/mean_conf": 0.19940944881889766, "calib/mu_c": 0.19675824175824178, "calib/mu_w": 0.2061111111111111, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08557842600406948, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.14423172242874843, "calib/step_q_c_n": 1614.0, "calib/step_q_gap": -0.007060412402712241, "calib/step_q_w": 0.15129213483146067, "calib/step_q_w_n": 712.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 518.34375, "completions/mean_terminated_length": 522.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.06506666666666666, "grad_norm": 0.5418161749839783, "learning_rate": 3.88888888888889e-06, "loss": -0.0156, "num_tokens": 15163780.0, "reward": 1.20703125, "reward_std": 0.16414855420589447, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.9921875, "step": 61 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.8828125, "calib/ece": 0.4805952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01922619047619048, "calib/mean_conf": 0.19043650793650793, "calib/mu_c": 0.1968452380952381, "calib/mu_w": 0.17761904761904762, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0021825396825396826, "calib/std_conf": 0.09542411561955262, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.12830283353010627, "calib/step_q_c_n": 1694.0, "calib/step_q_gap": -0.028620243392970673, "calib/step_q_w": 0.15692307692307694, "calib/step_q_w_n": 1092.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 628.1875, "completions/mean_terminated_length": 633.1338500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.06613333333333334, "grad_norm": 0.34490272402763367, "learning_rate": 3.861111111111112e-06, "loss": 0.0182, "num_tokens": 15431676.0, "reward": 1.1484375, "reward_std": 0.3012402057647705, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.984375, "step": 62 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.375, "calib/ece": 0.5203571428571427, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0023104002448917083, "calib/mean_conf": 0.1987698412698413, "calib/mu_c": 0.19810055865921788, "calib/mu_w": 0.2004109589041096, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004404761904761904, "calib/std_conf": 0.06918226016976482, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.14131530424093425, "calib/step_q_c_n": 1627.0, "calib/step_q_gap": -0.03347575503992095, "calib/step_q_w": 0.1747910592808552, "calib/step_q_w_n": 1029.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2505.0, "completions/max_terminated_length": 2505.0, "completions/mean_length": 668.8046875, "completions/mean_terminated_length": 674.0708618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.0672, "grad_norm": 0.457501083612442, "learning_rate": 3.833333333333334e-06, "loss": 0.0236, "num_tokens": 15711530.0, "reward": 1.1875, "reward_std": 0.19891968369483948, "rewards/accuracy_reward_step": 0.69921875, "rewards/format_reward_step": 0.9765625, "step": 63 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.96875, "calib/ece": 0.5169322709163348, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.011759080681452844, "calib/mean_conf": 0.22657370517928288, "calib/mu_c": 0.22338797814207656, "calib/mu_w": 0.2351470588235294, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007211155378486055, "calib/std_conf": 0.09112080739138619, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17443543717429066, "calib/step_q_c_n": 1727.0, "calib/step_q_gap": -0.05732213858328508, "calib/step_q_w": 0.23175757575757575, "calib/step_q_w_n": 825.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 547.78515625, "completions/mean_terminated_length": 554.2806396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.06826666666666667, "grad_norm": 0.4879423975944519, "learning_rate": 3.8055555555555556e-06, "loss": -0.0166, "num_tokens": 15955539.0, "reward": 1.205078125, "reward_std": 0.19858914613723755, "rewards/accuracy_reward_step": 0.71484375, "rewards/format_reward_step": 0.98046875, "step": 64 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.99609375, "calib/ece": 0.41180392156862755, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0021226602947032602, "calib/mean_conf": 0.22349019607843137, "calib/mu_c": 0.22271604938271605, "calib/mu_w": 0.2248387096774193, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.062490586757486026, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1703058994901675, "calib/step_q_c_n": 1373.0, "calib/step_q_gap": -0.005059691907681962, "calib/step_q_w": 0.17536559139784946, "calib/step_q_w_n": 930.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1615.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 524.0390625, "completions/mean_terminated_length": 528.1653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.06933333333333333, "grad_norm": 0.32161787152290344, "learning_rate": 3.777777777777778e-06, "loss": -0.0061, "num_tokens": 16194717.0, "reward": 1.130859375, "reward_std": 0.13571283221244812, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.99609375, "step": 65 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 12.6796875, "calib/ece": 0.2913524590163934, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016152597402597335, "calib/mean_conf": 0.2496311475409836, "calib/mu_c": 0.2570454545454545, "calib/mu_w": 0.2408928571428572, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06874956939592984, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.19214477211796246, "calib/step_q_c_n": 1492.0, "calib/step_q_gap": -0.03424063267109115, "calib/step_q_w": 0.2263854047890536, "calib/step_q_w_n": 1754.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 726.1171875, "completions/mean_terminated_length": 737.6428833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.0704, "grad_norm": 2.085747480392456, "learning_rate": 3.7500000000000005e-06, "loss": 0.0175, "num_tokens": 16486955.0, "reward": 0.9921875, "reward_std": 0.2367658019065857, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.953125, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.07421875, "calib/ece": 0.3971259842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0179152823920265, "calib/mean_conf": 0.2642913385826772, "calib/mu_c": 0.27035714285714285, "calib/mu_w": 0.25244186046511635, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08813068412326645, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.19979757085020244, "calib/step_q_c_n": 1729.0, "calib/step_q_gap": 0.009191784231757594, "calib/step_q_w": 0.19060578661844485, "calib/step_q_w_n": 1106.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2467.0, "completions/max_terminated_length": 2467.0, "completions/mean_length": 672.140625, "completions/mean_terminated_length": 677.4330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.07146666666666666, "grad_norm": 0.5508794784545898, "learning_rate": 3.7222222222222225e-06, "loss": 0.0023, "num_tokens": 16764031.0, "reward": 1.15234375, "reward_std": 0.16439500451087952, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.3203125, "calib/ece": 0.28082677165354336, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014592363261093921, "calib/mean_conf": 0.331771653543307, "calib/mu_c": 0.33763157894736845, "calib/mu_w": 0.32303921568627453, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0070866141732283455, "calib/std_conf": 0.13205973886673128, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26028397565922917, "calib/step_q_c_n": 1479.0, "calib/step_q_gap": 0.020410825553520917, "calib/step_q_w": 0.23987315010570825, "calib/step_q_w_n": 1419.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2425.0, "completions/max_terminated_length": 2425.0, "completions/mean_length": 642.328125, "completions/mean_terminated_length": 647.3858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.07253333333333334, "grad_norm": 0.3224004805088043, "learning_rate": 3.694444444444445e-06, "loss": -0.0131, "num_tokens": 17032555.0, "reward": 1.08984375, "reward_std": 0.16255776584148407, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 11.8828125, "calib/ece": 0.2305882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.037564408696745044, "calib/mean_conf": 0.4050980392156863, "calib/mu_c": 0.3890410958904109, "calib/mu_w": 0.42660550458715596, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.031568627450980366, "calib/std_conf": 0.14982044873414452, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33201261829652995, "calib/step_q_c_n": 1585.0, "calib/step_q_gap": -0.028014835375398617, "calib/step_q_w": 0.36002745367192857, "calib/step_q_w_n": 1457.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 745.046875, "completions/mean_terminated_length": 747.9686889648438, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.0736, "grad_norm": 0.31790482997894287, "learning_rate": 3.6666666666666666e-06, "loss": -0.0037, "num_tokens": 17327783.0, "reward": 1.068359375, "reward_std": 0.2722419202327728, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.99609375, "step": 69 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 12.109375, "calib/ece": 0.1654032258064516, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012714486638537248, "calib/mean_conf": 0.5303225806451614, "calib/mu_c": 0.5349367088607595, "calib/mu_w": 0.5222222222222223, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029314516129032256, "calib/std_conf": 0.1149118560088766, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.464349617422013, "calib/step_q_c_n": 1699.0, "calib/step_q_gap": 0.023021994295674697, "calib/step_q_w": 0.4413276231263383, "calib/step_q_w_n": 1401.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 732.96484375, "completions/mean_terminated_length": 744.5992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.07466666666666667, "grad_norm": 0.7387703657150269, "learning_rate": 3.638888888888889e-06, "loss": -0.0024, "num_tokens": 17622414.0, "reward": 1.1015625, "reward_std": 0.17702125012874603, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.96875, "step": 70 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 11.98046875, "calib/ece": 0.09847656249999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.0018395275136847689, "calib/mean_conf": 0.6123046875, "calib/mu_c": 0.6128651685393258, "calib/mu_w": 0.611025641025641, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0077343749999999965, "calib/std_conf": 0.0873601613467337, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5387731481481481, "calib/step_q_c_n": 2160.0, "calib/step_q_gap": -0.0059126291396136565, "calib/step_q_w": 0.5446857772877618, "calib/step_q_w_n": 907.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2022.0, "completions/max_terminated_length": 2022.0, "completions/mean_length": 699.64453125, "completions/mean_terminated_length": 705.153564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.07573333333333333, "grad_norm": 0.29512184858322144, "learning_rate": 3.6111111111111115e-06, "loss": 0.0128, "num_tokens": 17905931.0, "reward": 1.1953125, "reward_std": 0.24605843424797058, "rewards/accuracy_reward_step": 0.6953125, "rewards/format_reward_step": 1.0, "step": 71 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.57421875, "calib/ece": 0.09527559055118104, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009160904255319013, "calib/mean_conf": 0.6379527559055118, "calib/mu_c": 0.6345625, "calib/mu_w": 0.643723404255319, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05165354330708659, "calib/std_conf": 0.05090269523481802, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5643253747917824, "calib/step_q_c_n": 1801.0, "calib/step_q_gap": -0.020614384244362238, "calib/step_q_w": 0.5849397590361446, "calib/step_q_w_n": 1162.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1308.0, "completions/max_terminated_length": 1308.0, "completions/mean_length": 616.13671875, "completions/mean_terminated_length": 620.9881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.0768, "grad_norm": 0.22814252972602844, "learning_rate": 3.5833333333333335e-06, "loss": -0.0222, "num_tokens": 18168070.0, "reward": 1.12109375, "reward_std": 0.10495803505182266, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.9921875, "step": 72 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.890625, "calib/ece": 0.1998823529411764, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.003478260869565153, "calib/mean_conf": 0.6293725490196079, "calib/mu_c": 0.6299999999999999, "calib/mu_w": 0.6265217391304347, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004823529411764709, "calib/std_conf": 0.048462623877688904, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5721419009370817, "calib/step_q_c_n": 2241.0, "calib/step_q_gap": -0.01029466213421626, "calib/step_q_w": 0.582436563071298, "calib/step_q_w_n": 547.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1722.0, "completions/max_terminated_length": 1722.0, "completions/mean_length": 594.7109375, "completions/mean_terminated_length": 599.3936767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.07786666666666667, "grad_norm": 0.34402838349342346, "learning_rate": 3.555555555555556e-06, "loss": -0.0233, "num_tokens": 18427348.0, "reward": 1.314453125, "reward_std": 0.2204178273677826, "rewards/accuracy_reward_step": 0.81640625, "rewards/format_reward_step": 0.99609375, "step": 73 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 12.546875, "calib/ece": 0.1037549407114625, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.004371177370030677, "calib/mean_conf": 0.6729249011857708, "calib/mu_c": 0.6710416666666666, "calib/mu_w": 0.6754128440366973, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1037549407114625, "calib/std_conf": 0.07039233848004939, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.606, "calib/step_q_c_n": 1625.0, "calib/step_q_gap": -0.00197101449275372, "calib/step_q_w": 0.6079710144927537, "calib/step_q_w_n": 1587.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 705.6328125, "completions/mean_terminated_length": 708.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.07893333333333333, "grad_norm": 0.5026135444641113, "learning_rate": 3.5277777777777784e-06, "loss": 0.012, "num_tokens": 18711918.0, "reward": 1.056640625, "reward_std": 0.23151721060276031, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.98828125, "step": 74 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.28125, "calib/ece": 0.14257812499999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": -0.009701720452000573, "calib/mean_conf": 0.7010156249999999, "calib/mu_c": 0.6992344497607654, "calib/mu_w": 0.708936170212766, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013593750000000007, "calib/std_conf": 0.061030727964357226, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6336624513618677, "calib/step_q_c_n": 2056.0, "calib/step_q_gap": -0.012257687527021055, "calib/step_q_w": 0.6459201388888888, "calib/step_q_w_n": 576.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1830.0, "completions/max_terminated_length": 1830.0, "completions/mean_length": 599.86328125, "completions/mean_terminated_length": 604.5866088867188, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.08, "grad_norm": 0.38530319929122925, "learning_rate": 3.5e-06, "loss": 0.0033, "num_tokens": 18970235.0, "reward": 1.31640625, "reward_std": 0.152816042304039, "rewards/accuracy_reward_step": 0.81640625, "rewards/format_reward_step": 1.0, "step": 75 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 11.44140625, "calib/ece": 0.06450988142292488, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.012663003663003769, "calib/mean_conf": 0.7305889328063241, "calib/mu_c": 0.7263846153846153, "calib/mu_w": 0.7390476190476191, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06355731225296442, "calib/std_conf": 0.05823786745392095, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6648797327394208, "calib/step_q_c_n": 1796.0, "calib/step_q_gap": -0.020451246960490943, "calib/step_q_w": 0.6853309796999117, "calib/step_q_w_n": 1133.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2525.0, "completions/max_terminated_length": 2525.0, "completions/mean_length": 653.90625, "completions/mean_terminated_length": 659.0551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.08106666666666666, "grad_norm": 0.2749916911125183, "learning_rate": 3.4722222222222224e-06, "loss": -0.0397, "num_tokens": 19240691.0, "reward": 1.154296875, "reward_std": 0.14932094514369965, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.98828125, "step": 76 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 11.51953125, "calib/ece": 0.12937999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.028, "calib/gap": -0.01767592592592604, "calib/mean_conf": 0.7310679999999999, "calib/mu_c": 0.72725, "calib/mu_w": 0.744925925925926, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03822399999999998, "calib/std_conf": 0.07652042456756235, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.676183522446966, "calib/step_q_c_n": 2027.0, "calib/step_q_gap": -0.03730508926669995, "calib/step_q_w": 0.713488611713666, "calib/step_q_w_n": 922.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 622.73828125, "completions/mean_terminated_length": 632.623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.08213333333333334, "grad_norm": 0.3323403000831604, "learning_rate": 3.444444444444445e-06, "loss": -0.0161, "num_tokens": 19504776.0, "reward": 1.25390625, "reward_std": 0.2412024885416031, "rewards/accuracy_reward_step": 0.765625, "rewards/format_reward_step": 0.9765625, "step": 77 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 12.43359375, "calib/ece": 0.11027450980392149, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": -0.004929647906108503, "calib/mean_conf": 0.7494901960784314, "calib/mu_c": 0.7477116564417177, "calib/mu_w": 0.7526413043478262, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11027450980392149, "calib/std_conf": 0.059183255630460015, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6824272493573265, "calib/step_q_c_n": 1945.0, "calib/step_q_gap": -0.016760957427810808, "calib/step_q_w": 0.6991882067851373, "calib/step_q_w_n": 1238.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 737.19140625, "completions/mean_terminated_length": 740.0823974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.0832, "grad_norm": 0.3206815719604492, "learning_rate": 3.416666666666667e-06, "loss": 0.0029, "num_tokens": 19801521.0, "reward": 1.134765625, "reward_std": 0.1259935051202774, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.99609375, "step": 78 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 11.1171875, "calib/ece": 0.054714062499999966, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": 0.006358829253686582, "calib/mean_conf": 0.7532140625, "calib/mu_c": 0.7550273224043716, "calib/mu_w": 0.7486684931506851, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0465421875, "calib/std_conf": 0.06678973856249247, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7061187961985216, "calib/step_q_c_n": 1894.0, "calib/step_q_gap": 0.014124258383395483, "calib/step_q_w": 0.6919945378151261, "calib/step_q_w_n": 952.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1911.0, "completions/max_terminated_length": 1911.0, "completions/mean_length": 710.62890625, "completions/mean_terminated_length": 716.2244262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.08426666666666667, "grad_norm": 0.2801198661327362, "learning_rate": 3.3888888888888893e-06, "loss": -0.0243, "num_tokens": 20089818.0, "reward": 1.21484375, "reward_std": 0.1825442910194397, "rewards/accuracy_reward_step": 0.71484375, "rewards/format_reward_step": 1.0, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.17578125, "calib/ece": 0.06495703125000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00034867860187548505, "calib/mean_conf": 0.7440351562499999, "calib/mu_c": 0.7439411764705883, "calib/mu_w": 0.7442898550724638, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03926171874999998, "calib/std_conf": 0.057322643717714956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6887900410677618, "calib/step_q_c_n": 1948.0, "calib/step_q_gap": -0.003767036557808967, "calib/step_q_w": 0.6925570776255707, "calib/step_q_w_n": 657.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1356.0, "completions/max_terminated_length": 1356.0, "completions/mean_length": 559.10546875, "completions/mean_terminated_length": 563.5078735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.08533333333333333, "grad_norm": 0.3234640657901764, "learning_rate": 3.3611111111111117e-06, "loss": -0.0066, "num_tokens": 20335109.0, "reward": 1.23046875, "reward_std": 0.16728198528289795, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 1.0, "step": 80 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.671875, "calib/ece": 0.06084143426294811, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": -0.01690155610155608, "calib/mean_conf": 0.7262820717131473, "calib/mu_c": 0.7218378378378378, "calib/mu_w": 0.7387393939393939, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.025035856573705183, "calib/std_conf": 0.055329716852905955, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6718737238044062, "calib/step_q_c_n": 1861.0, "calib/step_q_gap": -0.018236035093412206, "calib/step_q_w": 0.6901097588978184, "calib/step_q_w_n": 871.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2525.0, "completions/max_terminated_length": 2525.0, "completions/mean_length": 644.9921875, "completions/mean_terminated_length": 652.6403198242188, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.0864, "grad_norm": 1.7586580514907837, "learning_rate": 3.3333333333333333e-06, "loss": -0.0175, "num_tokens": 20606475.0, "reward": 1.2109375, "reward_std": 0.17270183563232422, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.9765625, "step": 81 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.63671875, "calib/ece": 0.07736259842519684, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.018181593462717083, "calib/mean_conf": 0.7222688976377952, "calib/mu_c": 0.7158981818181818, "calib/mu_w": 0.7340797752808988, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07501259842519682, "calib/std_conf": 0.055923123705016065, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6629955128205127, "calib/step_q_c_n": 1560.0, "calib/step_q_gap": -0.021624865511387537, "calib/step_q_w": 0.6846203783319003, "calib/step_q_w_n": 1163.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 634.265625, "completions/mean_terminated_length": 636.7529907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.08746666666666666, "grad_norm": 0.3052951991558075, "learning_rate": 3.3055555555555558e-06, "loss": 0.0178, "num_tokens": 20874399.0, "reward": 1.140625, "reward_std": 0.19871768355369568, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.9921875, "step": 82 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 12.33203125, "calib/ece": 0.13502213438735175, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.019762845849802372, "calib/gap": -0.02882232035327148, "calib/mean_conf": 0.7339501976284584, "calib/mu_c": 0.7232415094339623, "calib/mu_w": 0.7520638297872337, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1202569169960474, "calib/std_conf": 0.06278977197273311, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6634853221957041, "calib/step_q_c_n": 1676.0, "calib/step_q_gap": -0.028898202449805677, "calib/step_q_w": 0.6923835246455098, "calib/step_q_w_n": 1481.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2434.0, "completions/max_terminated_length": 2434.0, "completions/mean_length": 747.3125, "completions/mean_terminated_length": 753.1968383789062, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.08853333333333334, "grad_norm": 0.2530120611190796, "learning_rate": 3.277777777777778e-06, "loss": -0.002, "num_tokens": 21172975.0, "reward": 1.115234375, "reward_std": 0.1734684705734253, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.98828125, "step": 83 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.45703125, "calib/ece": 0.09926023437500002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.0005148772926892864, "calib/mean_conf": 0.716447734375, "calib/mu_c": 0.7162506329113923, "calib/mu_w": 0.7167655102040816, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09926023437500002, "calib/std_conf": 0.05594260874317485, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6651108609271524, "calib/step_q_c_n": 1510.0, "calib/step_q_gap": -0.0043531236486830105, "calib/step_q_w": 0.6694639845758354, "calib/step_q_w_n": 1167.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1671.0, "completions/max_terminated_length": 1671.0, "completions/mean_length": 609.6953125, "completions/mean_terminated_length": 614.4960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.0896, "grad_norm": 0.5088642835617065, "learning_rate": 3.2500000000000002e-06, "loss": 0.0052, "num_tokens": 21434977.0, "reward": 1.1171875, "reward_std": 0.19530275464057922, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 1.0, "step": 84 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 11.0390625, "calib/ece": 0.08913231075697205, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.011304903846153791, "calib/mean_conf": 0.7246701593625497, "calib/mu_c": 0.72876875, "calib/mu_w": 0.7174638461538462, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08817613545816728, "calib/std_conf": 0.05422468456992324, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.667843023255814, "calib/step_q_c_n": 1720.0, "calib/step_q_gap": -0.002186316708019609, "calib/step_q_w": 0.6700293399638336, "calib/step_q_w_n": 1106.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 682.765625, "completions/mean_terminated_length": 690.8616943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.09066666666666667, "grad_norm": 0.3552238643169403, "learning_rate": 3.2222222222222227e-06, "loss": -0.0124, "num_tokens": 21717589.0, "reward": 1.11328125, "reward_std": 0.25730133056640625, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.9765625, "step": 85 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 11.50390625, "calib/ece": 0.1369408730158731, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": -0.014084684210526555, "calib/mean_conf": 0.7401154761904762, "calib/mu_c": 0.7345263157894737, "calib/mu_w": 0.7486110000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1369408730158731, "calib/std_conf": 0.06631348260175485, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.674241186345831, "calib/step_q_c_n": 1787.0, "calib/step_q_gap": -0.004901300700801081, "calib/step_q_w": 0.679142487046632, "calib/step_q_w_n": 1158.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 766.328125, "completions/mean_terminated_length": 769.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.09173333333333333, "grad_norm": 0.34955188632011414, "learning_rate": 3.1944444444444443e-06, "loss": 0.0287, "num_tokens": 22019281.0, "reward": 1.080078125, "reward_std": 0.19237485527992249, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.97265625, "step": 86 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 9.87109375, "calib/ece": 0.10698042968750003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015625, "calib/gap": 0.006787857484457094, "calib/mean_conf": 0.7179648046875, "calib/mu_c": 0.7193170731707316, "calib/mu_w": 0.7125292156862745, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0120819921875, "calib/std_conf": 0.05209170035673249, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6690812720848056, "calib/step_q_c_n": 1981.0, "calib/step_q_gap": 0.00742588197491556, "calib/step_q_w": 0.66165539010989, "calib/step_q_w_n": 546.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1944.0, "completions/max_terminated_length": 1944.0, "completions/mean_length": 558.4609375, "completions/mean_terminated_length": 562.8582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.0928, "grad_norm": 0.40007147192955017, "learning_rate": 3.1666666666666667e-06, "loss": -0.0021, "num_tokens": 22267743.0, "reward": 1.296875, "reward_std": 0.2075463831424713, "rewards/accuracy_reward_step": 0.80078125, "rewards/format_reward_step": 0.9921875, "step": 87 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.15625, "calib/ece": 0.05912992125984258, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": -0.009182278481012718, "calib/mean_conf": 0.741255905511811, "calib/mu_c": 0.7384, "calib/mu_w": 0.7475822784810127, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.055704724409448875, "calib/std_conf": 0.06912001334383905, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.673917751171265, "calib/step_q_c_n": 1921.0, "calib/step_q_gap": -0.015334976101462283, "calib/step_q_w": 0.6892527272727272, "calib/step_q_w_n": 935.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2875.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 725.66015625, "completions/mean_terminated_length": 731.3740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.09386666666666667, "grad_norm": 0.5738656520843506, "learning_rate": 3.138888888888889e-06, "loss": 0.0035, "num_tokens": 22563360.0, "reward": 1.177734375, "reward_std": 0.21606913208961487, "rewards/accuracy_reward_step": 0.68359375, "rewards/format_reward_step": 0.98828125, "step": 88 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 11.77734375, "calib/ece": 0.14020318725099604, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.017074783521385584, "calib/mean_conf": 0.7298446215139442, "calib/mu_c": 0.7228378378378378, "calib/mu_w": 0.7399126213592234, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.14020318725099604, "calib/std_conf": 0.06890072945100548, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.660207026348808, "calib/step_q_c_n": 1594.0, "calib/step_q_gap": -0.030072354368996357, "calib/step_q_w": 0.6902793807178044, "calib/step_q_w_n": 1421.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2518.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 767.21875, "completions/mean_terminated_length": 776.3162231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.09493333333333333, "grad_norm": 0.2824539840221405, "learning_rate": 3.1111111111111116e-06, "loss": -0.0148, "num_tokens": 22868656.0, "reward": 1.06640625, "reward_std": 0.2012740671634674, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9765625, "step": 89 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 11.89453125, "calib/ece": 0.06254980079681267, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.002347365278399627, "calib/mean_conf": 0.7262948207171314, "calib/mu_c": 0.7255747126436782, "calib/mu_w": 0.7279220779220779, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04780876494023897, "calib/std_conf": 0.07368648411549109, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.660236537493709, "calib/step_q_c_n": 1987.0, "calib/step_q_gap": -0.015680286702888346, "calib/step_q_w": 0.6759168241965974, "calib/step_q_w_n": 1058.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1878.0, "completions/max_terminated_length": 1878.0, "completions/mean_length": 694.2421875, "completions/mean_terminated_length": 708.0717163085938, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.096, "grad_norm": 0.36388516426086426, "learning_rate": 3.0833333333333336e-06, "loss": -0.0381, "num_tokens": 23149702.0, "reward": 1.169921875, "reward_std": 0.21346309781074524, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.98046875, "step": 90 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 11.84375, "calib/ece": 0.10066929133858277, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010466200466200615, "calib/mean_conf": 0.6818503937007874, "calib/mu_c": 0.6786363636363636, "calib/mu_w": 0.6891025641025642, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04480314960629926, "calib/std_conf": 0.06787640211746589, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6107117969138874, "calib/step_q_c_n": 2009.0, "calib/step_q_gap": -0.022455358511332513, "calib/step_q_w": 0.63316715542522, "calib/step_q_w_n": 1023.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1942.0, "completions/max_terminated_length": 1942.0, "completions/mean_length": 734.11328125, "completions/mean_terminated_length": 742.8182373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.09706666666666666, "grad_norm": 0.3484867215156555, "learning_rate": 3.055555555555556e-06, "loss": -0.0073, "num_tokens": 23445347.0, "reward": 1.18359375, "reward_std": 0.2495465874671936, "rewards/accuracy_reward_step": 0.6875, "rewards/format_reward_step": 0.9921875, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 10.3359375, "calib/ece": 0.14858203125000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.028630628535999292, "calib/mean_conf": 0.63883984375, "calib/mu_c": 0.6311229946524065, "calib/mu_w": 0.6597536231884058, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02847656250000002, "calib/std_conf": 0.05463817180987653, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5719755147468003, "calib/step_q_c_n": 1797.0, "calib/step_q_gap": -0.019971481719630813, "calib/step_q_w": 0.5919469964664311, "calib/step_q_w_n": 849.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1580.0, "completions/max_terminated_length": 1580.0, "completions/mean_length": 622.1640625, "completions/mean_terminated_length": 627.06298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.09813333333333334, "grad_norm": 0.3795936703681946, "learning_rate": 3.0277777777777776e-06, "loss": -0.0023, "num_tokens": 23711341.0, "reward": 1.228515625, "reward_std": 0.17531254887580872, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 0.99609375, "step": 92 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.91015625, "calib/ece": 0.1233858267716535, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0200209542230817, "calib/mean_conf": 0.6407874015748031, "calib/mu_c": 0.6355851063829788, "calib/mu_w": 0.6556060606060605, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012007874015748009, "calib/std_conf": 0.06036120017816887, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5698559670781893, "calib/step_q_c_n": 1944.0, "calib/step_q_gap": -0.017258284983059258, "calib/step_q_w": 0.5871142520612486, "calib/step_q_w_n": 849.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 694.6953125, "completions/mean_terminated_length": 702.932861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.0992, "grad_norm": 0.3373625576496124, "learning_rate": 3e-06, "loss": -0.0173, "num_tokens": 23994959.0, "reward": 1.23046875, "reward_std": 0.19140824675559998, "rewards/accuracy_reward_step": 0.734375, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 9.92578125, "calib/ece": 0.09505882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.012319182389937122, "calib/mean_conf": 0.620235294117647, "calib/mu_c": 0.6155974842767296, "calib/mu_w": 0.6279166666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04588235294117648, "calib/std_conf": 0.05204017612187577, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.564599589322382, "calib/step_q_c_n": 1461.0, "calib/step_q_gap": -0.01198374401095137, "calib/step_q_w": 0.5765833333333333, "calib/step_q_w_n": 1080.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2383.0, "completions/max_terminated_length": 2383.0, "completions/mean_length": 617.9765625, "completions/mean_terminated_length": 622.842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.10026666666666667, "grad_norm": 0.2551957368850708, "learning_rate": 2.9722222222222225e-06, "loss": -0.0174, "num_tokens": 24261841.0, "reward": 1.119140625, "reward_std": 0.15191996097564697, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.99609375, "step": 94 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 11.12890625, "calib/ece": 0.12202380952380956, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011131101250504094, "calib/mean_conf": 0.6475000000000001, "calib/mu_c": 0.6445405405405406, "calib/mu_w": 0.6556716417910446, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017698412698412692, "calib/std_conf": 0.05655889611863664, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5712020725388601, "calib/step_q_c_n": 1930.0, "calib/step_q_gap": -0.01644754661239123, "calib/step_q_w": 0.5876496191512514, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 749.59375, "completions/mean_terminated_length": 755.4960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.10133333333333333, "grad_norm": 0.43740609288215637, "learning_rate": 2.944444444444445e-06, "loss": -0.024, "num_tokens": 24559865.0, "reward": 1.21484375, "reward_std": 0.23373465240001678, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.984375, "step": 95 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.90625, "calib/ece": 0.17896825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.032203846153846194, "calib/mean_conf": 0.6380952380952382, "calib/mu_c": 0.63145, "calib/mu_w": 0.6636538461538461, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011706349206349186, "calib/std_conf": 0.0528732279257898, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5689752306945118, "calib/step_q_c_n": 2059.0, "calib/step_q_gap": -0.01829625634505161, "calib/step_q_w": 0.5872714870395634, "calib/step_q_w_n": 733.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 674.65234375, "completions/mean_terminated_length": 679.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.1024, "grad_norm": 0.4047119617462158, "learning_rate": 2.916666666666667e-06, "loss": 0.0177, "num_tokens": 24838392.0, "reward": 1.2734375, "reward_std": 0.18096905946731567, "rewards/accuracy_reward_step": 0.78125, "rewards/format_reward_step": 0.984375, "step": 96 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.21484375, "calib/ece": 0.04666666666666672, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.0028923719958201533, "calib/mean_conf": 0.633968253968254, "calib/mu_c": 0.632969696969697, "calib/mu_w": 0.6358620689655171, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012936507936507934, "calib/std_conf": 0.05331218874122451, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.565439459127228, "calib/step_q_c_n": 1627.0, "calib/step_q_gap": -0.020957302006375245, "calib/step_q_w": 0.5863967611336033, "calib/step_q_w_n": 988.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2204.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 662.5390625, "completions/mean_terminated_length": 673.0556030273438, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.10346666666666667, "grad_norm": 0.3296211063861847, "learning_rate": 2.888888888888889e-06, "loss": -0.0314, "num_tokens": 25113074.0, "reward": 1.13671875, "reward_std": 0.2406836599111557, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.984375, "step": 97 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.93359375, "calib/ece": 0.05466135458167331, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.005932300631095844, "calib/mean_conf": 0.6456573705179283, "calib/mu_c": 0.6476190476190476, "calib/mu_w": 0.6416867469879518, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0154980079681275, "calib/std_conf": 0.05651804118633278, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5745659526493799, "calib/step_q_c_n": 1774.0, "calib/step_q_gap": -0.004487705887205373, "calib/step_q_w": 0.5790536585365853, "calib/step_q_w_n": 1025.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 750.25390625, "completions/mean_terminated_length": 759.1502075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.10453333333333334, "grad_norm": 0.5152229070663452, "learning_rate": 2.861111111111111e-06, "loss": -0.0273, "num_tokens": 25411323.0, "reward": 1.146484375, "reward_std": 0.2555645704269409, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.98046875, "step": 98 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 13.6875, "calib/ece": 0.11594337349397595, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.029866776359088965, "calib/mean_conf": 0.6661441767068274, "calib/mu_c": 0.6533098591549297, "calib/mu_w": 0.6831766355140186, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10590321285140564, "calib/std_conf": 0.0799204432245325, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5810451453308597, "calib/step_q_c_n": 1617.0, "calib/step_q_gap": -0.05463529982017368, "calib/step_q_w": 0.6356804451510334, "calib/step_q_w_n": 1887.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 885.05078125, "completions/mean_terminated_length": 895.5454711914062, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.1056, "grad_norm": 0.9046245217323303, "learning_rate": 2.8333333333333335e-06, "loss": -0.0087, "num_tokens": 25743696.0, "reward": 1.041015625, "reward_std": 0.2987309396266937, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.97265625, "step": 99 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 12.90625, "calib/ece": 0.10766666666666669, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.008333333333333333, "calib/gap": -0.031806100875868326, "calib/mean_conf": 0.6669166666666667, "calib/mu_c": 0.6555194805194805, "calib/mu_w": 0.6873255813953488, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06645833333333337, "calib/std_conf": 0.06673699415533253, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5753517587939698, "calib/step_q_c_n": 1592.0, "calib/step_q_gap": -0.0632814187761237, "calib/step_q_w": 0.6386331775700935, "calib/step_q_w_n": 1712.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 829.94140625, "completions/mean_terminated_length": 881.5975341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 311.0, "epoch": 0.10666666666666667, "grad_norm": 0.6749600768089294, "learning_rate": 2.805555555555556e-06, "loss": -0.104, "num_tokens": 26063569.0, "reward": 1.0703125, "reward_std": 0.24402156472206116, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.9375, "step": 100 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 11.8828125, "calib/ece": 0.08617283950617284, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.024691358024691357, "calib/gap": 0.008903061224489806, "calib/mean_conf": 0.6758024691358024, "calib/mu_c": 0.6793197278911565, "calib/mu_w": 0.6704166666666667, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07851851851851853, "calib/std_conf": 0.07632286085147473, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.587848347375243, "calib/step_q_c_n": 1543.0, "calib/step_q_gap": -0.039883473838899675, "calib/step_q_w": 0.6277318212141427, "calib/step_q_w_n": 1499.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 927.57421875, "completions/mean_terminated_length": 949.8360595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 309.0, "epoch": 0.10773333333333333, "grad_norm": 0.6551363468170166, "learning_rate": 2.7777777777777783e-06, "loss": -0.0233, "num_tokens": 26408020.0, "reward": 1.048828125, "reward_std": 0.27887511253356934, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.94921875, "step": 101 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 11.37109375, "calib/ece": 0.0396761133603239, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.003409460458240976, "calib/mean_conf": 0.6502834008097166, "calib/mu_c": 0.6491515151515151, "calib/mu_w": 0.6525609756097561, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010971659919028343, "calib/std_conf": 0.06104621472093361, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5718552875695733, "calib/step_q_c_n": 1617.0, "calib/step_q_gap": -0.05240553159580541, "calib/step_q_w": 0.6242608191653787, "calib/step_q_w_n": 1294.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 728.40625, "completions/mean_terminated_length": 745.8880615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.1088, "grad_norm": 0.5937004089355469, "learning_rate": 2.7500000000000004e-06, "loss": -0.0515, "num_tokens": 26701188.0, "reward": 1.126953125, "reward_std": 0.18155640363693237, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.96484375, "step": 102 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 11.8828125, "calib/ece": 0.15508474576271183, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.01694915254237288, "calib/gap": -0.045967343687773665, "calib/mean_conf": 0.6633898305084746, "calib/mu_c": 0.6476129032258064, "calib/mu_w": 0.6935802469135801, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08084745762711865, "calib/std_conf": 0.0752591042860422, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5754819277108434, "calib/step_q_c_n": 1660.0, "calib/step_q_gap": -0.041833557093787554, "calib/step_q_w": 0.6173154848046309, "calib/step_q_w_n": 1382.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 894.046875, "completions/mean_terminated_length": 938.016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 336.0, "epoch": 0.10986666666666667, "grad_norm": 0.4124567210674286, "learning_rate": 2.7222222222222224e-06, "loss": -0.0684, "num_tokens": 27034616.0, "reward": 1.07421875, "reward_std": 0.23740246891975403, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.921875, "step": 103 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 11.72265625, "calib/ece": 0.08705394190871377, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.012448132780082987, "calib/gap": -0.01802863591479753, "calib/mean_conf": 0.6638174273858921, "calib/mu_c": 0.6561870503597123, "calib/mu_w": 0.6742156862745098, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08705394190871377, "calib/std_conf": 0.06525048979339483, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5755367231638419, "calib/step_q_c_n": 1416.0, "calib/step_q_gap": -0.04505633677306675, "calib/step_q_w": 0.6205930599369086, "calib/step_q_w_n": 1585.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2775.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 834.640625, "completions/mean_terminated_length": 872.1142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.11093333333333333, "grad_norm": 0.4785301983356476, "learning_rate": 2.6944444444444444e-06, "loss": -0.0446, "num_tokens": 27354964.0, "reward": 1.013671875, "reward_std": 0.2309260368347168, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.94140625, "step": 104 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 9.25, "calib/ece": 0.1797499999999999, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.004166666666666667, "calib/gap": -0.038034682080924975, "calib/mean_conf": 0.6525833333333334, "calib/mu_c": 0.6419653179190752, "calib/mu_w": 0.6800000000000002, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.055749999999999994, "calib/std_conf": 0.05867986357251429, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5645699937225361, "calib/step_q_c_n": 1593.0, "calib/step_q_gap": -0.01497839337423812, "calib/step_q_w": 0.5795483870967743, "calib/step_q_w_n": 775.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 840.31640625, "completions/mean_terminated_length": 870.9352416992188, "completions/min_length": 0.0, "completions/min_terminated_length": 302.0, "epoch": 0.112, "grad_norm": 0.31713542342185974, "learning_rate": 2.666666666666667e-06, "loss": -0.023, "num_tokens": 27675845.0, "reward": 1.14453125, "reward_std": 0.2606049180030823, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.9375, "step": 105 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.19140625, "calib/ece": 0.07927999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.027988570669147173, "calib/mean_conf": 0.6452, "calib/mu_c": 0.6338926174496645, "calib/mu_w": 0.6618811881188117, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06423999999999995, "calib/std_conf": 0.05042380390252208, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5630168453292496, "calib/step_q_c_n": 1306.0, "calib/step_q_gap": -0.031081389513421098, "calib/step_q_w": 0.5940982348426707, "calib/step_q_w_n": 1303.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 782.90234375, "completions/mean_terminated_length": 795.3294067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 295.0, "epoch": 0.11306666666666666, "grad_norm": 0.3023886978626251, "learning_rate": 2.6388888888888893e-06, "loss": -0.0247, "num_tokens": 27980852.0, "reward": 1.0703125, "reward_std": 0.16276384890079498, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.9765625, "step": 106 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.2734375, "calib/ece": 0.14872509960159358, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.005709268590624705, "calib/mean_conf": 0.6515936254980079, "calib/mu_c": 0.6532768361581921, "calib/mu_w": 0.6475675675675674, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.047569721115537825, "calib/std_conf": 0.056390872287046694, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5655207706201084, "calib/step_q_c_n": 1661.0, "calib/step_q_gap": -0.04727592700631056, "calib/step_q_w": 0.612796697626419, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 800.51953125, "completions/mean_terminated_length": 813.2262573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.11413333333333334, "grad_norm": 0.30182257294654846, "learning_rate": 2.6111111111111113e-06, "loss": 0.0129, "num_tokens": 28290401.0, "reward": 1.181640625, "reward_std": 0.21702617406845093, "rewards/accuracy_reward_step": 0.69140625, "rewards/format_reward_step": 0.98046875, "step": 107 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 10.63671875, "calib/ece": 0.1460080645161291, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009232899189826838, "calib/mean_conf": 0.6650403225806452, "calib/mu_c": 0.66321608040201, "calib/mu_w": 0.6724489795918368, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004314516129032264, "calib/std_conf": 0.0597778008199004, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5796858124693177, "calib/step_q_c_n": 2037.0, "calib/step_q_gap": -0.003389989279953487, "calib/step_q_w": 0.5830758017492712, "calib/step_q_w_n": 686.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 862.4609375, "completions/mean_terminated_length": 886.706787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.1152, "grad_norm": 0.2606714367866516, "learning_rate": 2.5833333333333337e-06, "loss": -0.0707, "num_tokens": 28614423.0, "reward": 1.26171875, "reward_std": 0.16206827759742737, "rewards/accuracy_reward_step": 0.77734375, "rewards/format_reward_step": 0.96875, "step": 108 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 11.21875, "calib/ece": 0.12272340425531918, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.00851063829787234, "calib/gap": -0.02565572541966432, "calib/mean_conf": 0.6565957446808511, "calib/mu_c": 0.6461151079136691, "calib/mu_w": 0.6717708333333334, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09391489361702125, "calib/std_conf": 0.06526139244296268, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5648293515358362, "calib/step_q_c_n": 1172.0, "calib/step_q_gap": -0.06726806022886955, "calib/step_q_w": 0.6320974117647058, "calib/step_q_w_n": 1700.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 821.3515625, "completions/mean_terminated_length": 876.1083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.11626666666666667, "grad_norm": 0.2767605185508728, "learning_rate": 2.5555555555555557e-06, "loss": -0.0747, "num_tokens": 28929289.0, "reward": 1.001953125, "reward_std": 0.20017710328102112, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.91796875, "step": 109 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.90234375, "calib/ece": 0.0916269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.008398117218341938, "calib/mean_conf": 0.6412301587301587, "calib/mu_c": 0.6387640449438201, "calib/mu_w": 0.647162162162162, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013253968253968247, "calib/std_conf": 0.054486005586175154, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.567722705961152, "calib/step_q_c_n": 1493.0, "calib/step_q_gap": -0.023867624827652034, "calib/step_q_w": 0.5915903307888041, "calib/step_q_w_n": 786.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 749.65234375, "completions/mean_terminated_length": 758.54150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.11733333333333333, "grad_norm": 0.33444392681121826, "learning_rate": 2.5277777777777778e-06, "loss": -0.0207, "num_tokens": 29226120.0, "reward": 1.1875, "reward_std": 0.21375533938407898, "rewards/accuracy_reward_step": 0.6953125, "rewards/format_reward_step": 0.984375, "step": 110 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.0, "calib/ece": 0.17157894736842103, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03143781094527354, "calib/mean_conf": 0.6361943319838056, "calib/mu_c": 0.6276666666666668, "calib/mu_w": 0.6591044776119404, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03951417004048582, "calib/std_conf": 0.05609501309554516, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5615014367816092, "calib/step_q_c_n": 1392.0, "calib/step_q_gap": -0.041788036902601355, "calib/step_q_w": 0.6032894736842106, "calib/step_q_w_n": 912.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2341.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 783.45703125, "completions/mean_terminated_length": 805.4818725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.1184, "grad_norm": 0.29022684693336487, "learning_rate": 2.5e-06, "loss": -0.0271, "num_tokens": 29534093.0, "reward": 1.18359375, "reward_std": 0.2522103786468506, "rewards/accuracy_reward_step": 0.703125, "rewards/format_reward_step": 0.9609375, "step": 111 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.8984375, "calib/ece": 0.07762096774193554, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011590909090909096, "calib/mean_conf": 0.6503629032258064, "calib/mu_c": 0.64625, "calib/mu_w": 0.6578409090909091, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04141129032258063, "calib/std_conf": 0.05729336428916774, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5658276125095346, "calib/step_q_c_n": 1311.0, "calib/step_q_gap": -0.014096172392223383, "calib/step_q_w": 0.579923784901758, "calib/step_q_w_n": 967.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2238.0, "completions/max_terminated_length": 2238.0, "completions/mean_length": 860.03125, "completions/mean_terminated_length": 887.774169921875, "completions/min_length": 0.0, "completions/min_terminated_length": 375.0, "epoch": 0.11946666666666667, "grad_norm": 0.27546238899230957, "learning_rate": 2.4722222222222226e-06, "loss": -0.0407, "num_tokens": 29862181.0, "reward": 1.109375, "reward_std": 0.23978866636753082, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.96875, "step": 112 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 7.68359375, "calib/ece": 0.06102880658436222, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.00411522633744856, "calib/gap": -0.01115797546012276, "calib/mean_conf": 0.6287654320987656, "calib/mu_c": 0.6250920245398772, "calib/mu_w": 0.63625, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00950617283950617, "calib/std_conf": 0.05716302690171662, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5484249767008388, "calib/step_q_c_n": 1073.0, "calib/step_q_gap": -0.04518799869066015, "calib/step_q_w": 0.5936129753914989, "calib/step_q_w_n": 894.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 726.50390625, "completions/mean_terminated_length": 749.9395141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 283.0, "epoch": 0.12053333333333334, "grad_norm": 0.2952890694141388, "learning_rate": 2.4444444444444447e-06, "loss": -0.0271, "num_tokens": 30153366.0, "reward": 1.111328125, "reward_std": 0.22342383861541748, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.94921875, "step": 113 }, { "calib/answer_extract_rate": 0.92578125, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.23105485232067513, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.026989010989010964, "calib/mean_conf": 0.6216033755274262, "calib/mu_c": 0.6168205128205129, "calib/mu_w": 0.6438095238095238, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014936708860759488, "calib/std_conf": 0.05321680508106623, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5515306122448981, "calib/step_q_c_n": 1274.0, "calib/step_q_gap": -0.08099991692742226, "calib/step_q_w": 0.6325305291723203, "calib/step_q_w_n": 737.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 683.72265625, "completions/mean_terminated_length": 729.30419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.1216, "grad_norm": 0.3420001268386841, "learning_rate": 2.4166666666666667e-06, "loss": -0.0587, "num_tokens": 30433423.0, "reward": 1.224609375, "reward_std": 0.18992431461811066, "rewards/accuracy_reward_step": 0.76171875, "rewards/format_reward_step": 0.92578125, "step": 114 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.0703125, "calib/ece": 0.06385542168674699, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001348588120740013, "calib/mean_conf": 0.6306827309236948, "calib/mu_c": 0.6301898734177215, "calib/mu_w": 0.6315384615384615, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030000000000000006, "calib/std_conf": 0.04578559846807956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5574569319114028, "calib/step_q_c_n": 1219.0, "calib/step_q_gap": -0.02704129713228076, "calib/step_q_w": 0.5844982290436835, "calib/step_q_w_n": 847.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 732.890625, "completions/mean_terminated_length": 744.5238647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 337.0, "epoch": 0.12266666666666666, "grad_norm": 0.3784162700176239, "learning_rate": 2.388888888888889e-06, "loss": -0.0124, "num_tokens": 30726307.0, "reward": 1.103515625, "reward_std": 0.17752531170845032, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.97265625, "step": 115 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.06744939271255054, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003655038759689977, "calib/mean_conf": 0.6301214574898786, "calib/mu_c": 0.6290116279069767, "calib/mu_w": 0.6326666666666667, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006072874493927086, "calib/std_conf": 0.05397166584111702, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5624370430544273, "calib/step_q_c_n": 1231.0, "calib/step_q_gap": -0.013833633637302056, "calib/step_q_w": 0.5762706766917294, "calib/step_q_w_n": 665.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 802.13671875, "completions/mean_terminated_length": 821.3880615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.12373333333333333, "grad_norm": 0.31041327118873596, "learning_rate": 2.361111111111111e-06, "loss": -0.0078, "num_tokens": 31036174.0, "reward": 1.154296875, "reward_std": 0.23412039875984192, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 0.96484375, "step": 116 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.58984375, "calib/ece": 0.11896414342629488, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.017238835725677748, "calib/mean_conf": 0.6310756972111553, "calib/mu_c": 0.6242763157894737, "calib/mu_w": 0.6415151515151515, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07223107569721121, "calib/std_conf": 0.05846678362848867, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5513297297297297, "calib/step_q_c_n": 925.0, "calib/step_q_gap": -0.007476044548485605, "calib/step_q_w": 0.5588057742782153, "calib/step_q_w_n": 762.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2023.0, "completions/max_terminated_length": 2023.0, "completions/mean_length": 713.640625, "completions/mean_terminated_length": 730.7680053710938, "completions/min_length": 0.0, "completions/min_terminated_length": 256.0, "epoch": 0.1248, "grad_norm": 0.33048129081726074, "learning_rate": 2.3333333333333336e-06, "loss": -0.0263, "num_tokens": 31325466.0, "reward": 1.083984375, "reward_std": 0.2014136016368866, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.98046875, "step": 117 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.7109375, "calib/ece": 0.1069047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.0239285714285713, "calib/mean_conf": 0.6409523809523808, "calib/mu_c": 0.6329761904761905, "calib/mu_w": 0.6569047619047618, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04059523809523809, "calib/std_conf": 0.059240277099613596, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5579061976549413, "calib/step_q_c_n": 1194.0, "calib/step_q_gap": -0.030504058755315144, "calib/step_q_w": 0.5884102564102565, "calib/step_q_w_n": 780.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 796.66015625, "completions/mean_terminated_length": 802.9330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 0.12586666666666665, "grad_norm": 0.5396471619606018, "learning_rate": 2.305555555555556e-06, "loss": -0.0079, "num_tokens": 31633419.0, "reward": 1.1484375, "reward_std": 0.20124970376491547, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.984375, "step": 118 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.42578125, "calib/ece": 0.07161507936507941, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": -0.005440476190476162, "calib/mean_conf": 0.6469325396825396, "calib/mu_c": 0.6451190476190476, "calib/mu_w": 0.6505595238095238, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02594047619047618, "calib/std_conf": 0.07483415136827791, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5731582840236688, "calib/step_q_c_n": 1352.0, "calib/step_q_gap": -0.0018603495167039963, "calib/step_q_w": 0.5750186335403727, "calib/step_q_w_n": 805.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 833.93359375, "completions/mean_terminated_length": 840.5, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.12693333333333334, "grad_norm": 0.5779143571853638, "learning_rate": 2.277777777777778e-06, "loss": -0.019, "num_tokens": 31951970.0, "reward": 1.1484375, "reward_std": 0.23095275461673737, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.984375, "step": 119 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.29296875, "calib/ece": 0.0986693548387097, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.019189539308557935, "calib/mean_conf": 0.6338306451612903, "calib/mu_c": 0.6284916201117319, "calib/mu_w": 0.6476811594202898, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005362903225806452, "calib/std_conf": 0.042162363161067036, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5576158940397351, "calib/step_q_c_n": 1359.0, "calib/step_q_gap": -0.0209443153843486, "calib/step_q_w": 0.5785602094240837, "calib/step_q_w_n": 764.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 764.48828125, "completions/mean_terminated_length": 773.5534057617188, "completions/min_length": 0.0, "completions/min_terminated_length": 320.0, "epoch": 0.128, "grad_norm": 0.2819494605064392, "learning_rate": 2.25e-06, "loss": 0.0236, "num_tokens": 32254367.0, "reward": 1.18359375, "reward_std": 0.1648695021867752, "rewards/accuracy_reward_step": 0.69921875, "rewards/format_reward_step": 0.96875, "step": 120 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.04296875, "calib/ece": 0.10322580645161307, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010199800199800269, "calib/mean_conf": 0.6502419354838709, "calib/mu_c": 0.6475274725274724, "calib/mu_w": 0.6577272727272727, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009798387096774209, "calib/std_conf": 0.06128284122421192, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5725629438347322, "calib/step_q_c_n": 1549.0, "calib/step_q_gap": -0.013585881230542052, "calib/step_q_w": 0.5861488250652742, "calib/step_q_w_n": 766.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2167.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 828.71875, "completions/mean_terminated_length": 855.4515991210938, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.12906666666666666, "grad_norm": 0.3823467791080475, "learning_rate": 2.222222222222222e-06, "loss": -0.0604, "num_tokens": 32571575.0, "reward": 1.1953125, "reward_std": 0.2390308976173401, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.96875, "step": 121 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 9.24609375, "calib/ece": 0.13908333333333334, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.004166666666666667, "calib/gap": -0.014493844049247584, "calib/mean_conf": 0.6385833333333333, "calib/mu_c": 0.6344767441860465, "calib/mu_w": 0.6489705882352941, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030500000000000013, "calib/std_conf": 0.05275408093745504, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5678004216444132, "calib/step_q_c_n": 1423.0, "calib/step_q_gap": -0.034244069881010364, "calib/step_q_w": 0.6020444915254236, "calib/step_q_w_n": 944.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 725.13671875, "completions/mean_terminated_length": 770.269775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 346.0, "epoch": 0.13013333333333332, "grad_norm": 0.3606873154640198, "learning_rate": 2.1944444444444445e-06, "loss": -0.0522, "num_tokens": 32864554.0, "reward": 1.140625, "reward_std": 0.1336795687675476, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 0.9375, "step": 122 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 9.19140625, "calib/ece": 0.05191056910569104, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.024390243902439025, "calib/gap": -8.377425044092668e-05, "calib/mean_conf": 0.664349593495935, "calib/mu_c": 0.664320987654321, "calib/mu_w": 0.6644047619047619, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028861788617886158, "calib/std_conf": 0.08075472084565177, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5770537491705374, "calib/step_q_c_n": 1507.0, "calib/step_q_gap": -0.017237030971306577, "calib/step_q_w": 0.594290780141844, "calib/step_q_w_n": 846.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 900.34375, "completions/mean_terminated_length": 933.1498413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 335.0, "epoch": 0.1312, "grad_norm": 0.40366265177726746, "learning_rate": 2.166666666666667e-06, "loss": -0.0764, "num_tokens": 33200330.0, "reward": 1.11328125, "reward_std": 0.28023582696914673, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.9609375, "step": 123 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.88671875, "calib/ece": 0.10948000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.02430859156057341, "calib/mean_conf": 0.6535599999999999, "calib/mu_c": 0.6468508287292817, "calib/mu_w": 0.6711594202898551, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019520000000000003, "calib/std_conf": 0.06392281595799734, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5701692708333334, "calib/step_q_c_n": 1536.0, "calib/step_q_gap": -0.013836141886558462, "calib/step_q_w": 0.5840054127198918, "calib/step_q_w_n": 739.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 829.16015625, "completions/mean_terminated_length": 845.6773071289062, "completions/min_length": 0.0, "completions/min_terminated_length": 319.0, "epoch": 0.13226666666666667, "grad_norm": 0.22802260518074036, "learning_rate": 2.138888888888889e-06, "loss": -0.0256, "num_tokens": 33519411.0, "reward": 1.1953125, "reward_std": 0.17892926931381226, "rewards/accuracy_reward_step": 0.70703125, "rewards/format_reward_step": 0.9765625, "step": 124 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 9.03125, "calib/ece": 0.08841897233201577, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.0006696195935385063, "calib/mean_conf": 0.6468379446640317, "calib/mu_c": 0.6471052631578948, "calib/mu_w": 0.6464356435643563, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0672332015810276, "calib/std_conf": 0.0648644179068933, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5699325337331334, "calib/step_q_c_n": 1334.0, "calib/step_q_gap": 8.590796626228858e-05, "calib/step_q_w": 0.5698466257668711, "calib/step_q_w_n": 978.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 806.55078125, "completions/mean_terminated_length": 816.1146850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.13333333333333333, "grad_norm": 0.2709088623523712, "learning_rate": 2.1111111111111114e-06, "loss": -0.0183, "num_tokens": 33830696.0, "reward": 1.087890625, "reward_std": 0.2635945975780487, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.98828125, "step": 125 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 9.6328125, "calib/ece": 0.1561570247933885, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.004132231404958678, "calib/gap": -0.04367865185990527, "calib/mean_conf": 0.6445041322314048, "calib/mu_c": 0.6302453987730061, "calib/mu_w": 0.6739240506329114, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06355371900826447, "calib/std_conf": 0.05899259289082415, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5620327421555252, "calib/step_q_c_n": 1466.0, "calib/step_q_gap": -0.061277257844474686, "calib/step_q_w": 0.6233099999999999, "calib/step_q_w_n": 1000.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 783.8984375, "completions/mean_terminated_length": 819.0938720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 306.0, "epoch": 0.1344, "grad_norm": 0.4443349242210388, "learning_rate": 2.0833333333333334e-06, "loss": -0.0611, "num_tokens": 34136838.0, "reward": 1.109375, "reward_std": 0.26804327964782715, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.9453125, "step": 126 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 9.62890625, "calib/ece": 0.07832653061224497, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.01936545664073752, "calib/mean_conf": 0.6517142857142858, "calib/mu_c": 0.6446794871794871, "calib/mu_w": 0.6640449438202246, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.046653061224489804, "calib/std_conf": 0.0549166344079267, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5608229426433916, "calib/step_q_c_n": 1203.0, "calib/step_q_gap": -0.04648292106500784, "calib/step_q_w": 0.6073058637083995, "calib/step_q_w_n": 1262.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 780.3515625, "completions/mean_terminated_length": 812.0731201171875, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 0.13546666666666668, "grad_norm": 0.25136062502861023, "learning_rate": 2.0555555555555555e-06, "loss": -0.0525, "num_tokens": 34440280.0, "reward": 1.087890625, "reward_std": 0.2381621152162552, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.95703125, "step": 127 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.46875, "calib/ece": 0.12346774193548388, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.033895837749982505, "calib/mean_conf": 0.6504032258064515, "calib/mu_c": 0.6382389937106917, "calib/mu_w": 0.6721348314606742, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06637096774193549, "calib/std_conf": 0.06796463968608007, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5672322070452911, "calib/step_q_c_n": 1391.0, "calib/step_q_gap": -0.018198577078619826, "calib/step_q_w": 0.5854307841239109, "calib/step_q_w_n": 1033.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 781.79296875, "completions/mean_terminated_length": 807.0120849609375, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.13653333333333334, "grad_norm": 0.31424975395202637, "learning_rate": 2.027777777777778e-06, "loss": -0.045, "num_tokens": 34747083.0, "reward": 1.10546875, "reward_std": 0.2466757893562317, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.96875, "step": 128 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.7421875, "calib/ece": 0.021764705882352867, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": 0.02230303030303027, "calib/mean_conf": 0.6444313725490196, "calib/mu_c": 0.6523030303030303, "calib/mu_w": 0.63, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009568627450980395, "calib/std_conf": 0.06580698303191819, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5802972972972973, "calib/step_q_c_n": 1480.0, "calib/step_q_gap": 0.016247165371175876, "calib/step_q_w": 0.5640501319261214, "calib/step_q_w_n": 758.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2272.0, "completions/max_terminated_length": 2272.0, "completions/mean_length": 745.8671875, "completions/mean_terminated_length": 748.7921752929688, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.1376, "grad_norm": 0.3294617235660553, "learning_rate": 2.0000000000000003e-06, "loss": 0.0198, "num_tokens": 35040409.0, "reward": 1.142578125, "reward_std": 0.21977511048316956, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.99609375, "step": 129 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.9296875, "calib/ece": 0.13968253968253969, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.03529761904761908, "calib/mean_conf": 0.6415873015873016, "calib/mu_c": 0.6298214285714285, "calib/mu_w": 0.6651190476190476, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.057301587301587305, "calib/std_conf": 0.04980776854070341, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5638549361987911, "calib/step_q_c_n": 1489.0, "calib/step_q_gap": -0.029535377191522327, "calib/step_q_w": 0.5933903133903135, "calib/step_q_w_n": 1053.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 718.37109375, "completions/mean_terminated_length": 726.8893432617188, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.13866666666666666, "grad_norm": 0.5593166351318359, "learning_rate": 1.9722222222222224e-06, "loss": -0.0497, "num_tokens": 35329600.0, "reward": 1.1484375, "reward_std": 0.12416224181652069, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.984375, "step": 130 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 11.15625, "calib/ece": 0.17669387755102034, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.024859370834444228, "calib/mean_conf": 0.6449387755102041, "calib/mu_c": 0.6326612903225806, "calib/mu_w": 0.6575206611570248, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15775510204081622, "calib/std_conf": 0.057906472150079845, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5670183852917666, "calib/step_q_c_n": 1251.0, "calib/step_q_gap": -0.031018997885803423, "calib/step_q_w": 0.59803738317757, "calib/step_q_w_n": 1605.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 752.140625, "completions/mean_terminated_length": 779.5465698242188, "completions/min_length": 0.0, "completions/min_terminated_length": 370.0, "epoch": 0.13973333333333332, "grad_norm": 0.8481810092926025, "learning_rate": 1.944444444444445e-06, "loss": -0.0373, "num_tokens": 35628356.0, "reward": 0.962890625, "reward_std": 0.2541370987892151, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.95703125, "step": 131 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.69921875, "calib/ece": 0.17771811023622053, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.010880097087378693, "calib/mean_conf": 0.6447259842519685, "calib/mu_c": 0.6426699029126213, "calib/mu_w": 0.65355, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005710236220472421, "calib/std_conf": 0.06773103036588879, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5663284457478006, "calib/step_q_c_n": 1705.0, "calib/step_q_gap": -0.006469255401624685, "calib/step_q_w": 0.5727977011494253, "calib/step_q_w_n": 522.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 764.26953125, "completions/mean_terminated_length": 770.2874145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 279.0, "epoch": 0.1408, "grad_norm": 0.27393683791160583, "learning_rate": 1.916666666666667e-06, "loss": 0.0111, "num_tokens": 35929601.0, "reward": 1.30078125, "reward_std": 0.21899119019508362, "rewards/accuracy_reward_step": 0.8046875, "rewards/format_reward_step": 0.9921875, "step": 132 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 11.84765625, "calib/ece": 0.12471544715447158, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": -0.01428610224726734, "calib/mean_conf": 0.6692682926829268, "calib/mu_c": 0.6632867132867133, "calib/mu_w": 0.6775728155339806, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10634146341463417, "calib/std_conf": 0.06447626060526396, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5788989053444945, "calib/step_q_c_n": 1553.0, "calib/step_q_gap": -0.022053797358208227, "calib/step_q_w": 0.6009527027027027, "calib/step_q_w_n": 1480.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2494.0, "completions/max_terminated_length": 2494.0, "completions/mean_length": 911.9453125, "completions/mean_terminated_length": 945.1741333007812, "completions/min_length": 0.0, "completions/min_terminated_length": 356.0, "epoch": 0.14186666666666667, "grad_norm": 0.6605852842330933, "learning_rate": 1.888888888888889e-06, "loss": -0.0547, "num_tokens": 36269403.0, "reward": 1.0390625, "reward_std": 0.33969852328300476, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.9609375, "step": 133 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 12.14453125, "calib/ece": 0.06972222222222227, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.00014432989690715825, "calib/mean_conf": 0.6719444444444443, "calib/mu_c": 0.6719999999999999, "calib/mu_w": 0.6718556701030928, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06329365079365083, "calib/std_conf": 0.07139614563821317, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5890830945558739, "calib/step_q_c_n": 1745.0, "calib/step_q_gap": -0.010902242687527819, "calib/step_q_w": 0.5999853372434018, "calib/step_q_w_n": 1364.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 906.65625, "completions/mean_terminated_length": 921.0476684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 355.0, "epoch": 0.14293333333333333, "grad_norm": 1.3199166059494019, "learning_rate": 1.8611111111111113e-06, "loss": -0.0304, "num_tokens": 36610459.0, "reward": 1.09765625, "reward_std": 0.30026212334632874, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.984375, "step": 134 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 10.6484375, "calib/ece": 0.075301204819277, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.016020104244229505, "calib/mean_conf": 0.651847389558233, "calib/mu_c": 0.6467647058823529, "calib/mu_w": 0.6627848101265824, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02220883534136544, "calib/std_conf": 0.05533215514313524, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5762599145820623, "calib/step_q_c_n": 1639.0, "calib/step_q_gap": -0.01798718753385309, "calib/step_q_w": 0.5942471021159154, "calib/step_q_w_n": 1087.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2484.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 779.2734375, "completions/mean_terminated_length": 801.1806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 339.0, "epoch": 0.144, "grad_norm": 22.018888473510742, "learning_rate": 1.8333333333333333e-06, "loss": -0.0541, "num_tokens": 36915833.0, "reward": 1.150390625, "reward_std": 0.17979279160499573, "rewards/accuracy_reward_step": 0.6640625, "rewards/format_reward_step": 0.97265625, "step": 135 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 11.046875, "calib/ece": 0.09971999999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.027167981017516718, "calib/mean_conf": 0.6575599999999999, "calib/mu_c": 0.647888198757764, "calib/mu_w": 0.6750561797752808, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.056639999999999996, "calib/std_conf": 0.06205196531939985, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5717223974763407, "calib/step_q_c_n": 1585.0, "calib/step_q_gap": -0.04463319383500286, "calib/step_q_w": 0.6163555913113435, "calib/step_q_w_n": 1243.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2909.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 792.5234375, "completions/mean_terminated_length": 808.310791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.14506666666666668, "grad_norm": 8.103403091430664, "learning_rate": 1.8055555555555557e-06, "loss": -0.0398, "num_tokens": 37227207.0, "reward": 1.1171875, "reward_std": 0.19905364513397217, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.9765625, "step": 136 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 11.69921875, "calib/ece": 0.05221774193548386, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": 0.005273477812177574, "calib/mean_conf": 0.6664112903225806, "calib/mu_c": 0.6684313725490196, "calib/mu_w": 0.6631578947368421, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.050846774193548376, "calib/std_conf": 0.06967974113141867, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5858468957203135, "calib/step_q_c_n": 1659.0, "calib/step_q_gap": -0.026376158171902198, "calib/step_q_w": 0.6122230538922157, "calib/step_q_w_n": 1336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 805.234375, "completions/mean_terminated_length": 827.8714599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 300.0, "epoch": 0.14613333333333334, "grad_norm": 6.265161514282227, "learning_rate": 1.777777777777778e-06, "loss": -0.0268, "num_tokens": 37540331.0, "reward": 1.08203125, "reward_std": 0.1679389476776123, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.96875, "step": 137 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 10.09375, "calib/ece": 0.10435483870967743, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.020161290322580645, "calib/gap": 0.02305802170208937, "calib/mean_conf": 0.6577419354838709, "calib/mu_c": 0.6632275132275132, "calib/mu_w": 0.6401694915254238, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08042566904549314, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.598025641025641, "calib/step_q_c_n": 1950.0, "calib/step_q_gap": 0.00047043597832252537, "calib/step_q_w": 0.5975552050473185, "calib/step_q_w_n": 634.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 752.515625, "completions/mean_terminated_length": 776.790283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.1472, "grad_norm": 4.048952579498291, "learning_rate": 1.75e-06, "loss": -0.0587, "num_tokens": 37837311.0, "reward": 1.22265625, "reward_std": 0.16601943969726562, "rewards/accuracy_reward_step": 0.73828125, "rewards/format_reward_step": 0.96875, "step": 138 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.109375, "calib/ece": 0.08968253968253964, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.013015030946065376, "calib/mean_conf": 0.6362698412698412, "calib/mu_c": 0.6322413793103447, "calib/mu_w": 0.6452564102564101, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017738095238095233, "calib/std_conf": 0.0440730657226954, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5634498680738786, "calib/step_q_c_n": 1516.0, "calib/step_q_gap": -0.01664817114180761, "calib/step_q_w": 0.5800980392156863, "calib/step_q_w_n": 816.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2489.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 678.41015625, "completions/mean_terminated_length": 689.1785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 278.0, "epoch": 0.14826666666666666, "grad_norm": 3.1196072101593018, "learning_rate": 1.7222222222222224e-06, "loss": -0.0342, "num_tokens": 38114080.0, "reward": 1.171875, "reward_std": 0.21596798300743103, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.984375, "step": 139 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 10.58203125, "calib/ece": 0.08480314960629928, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.018391330891330937, "calib/mean_conf": 0.6554330708661418, "calib/mu_c": 0.6502197802197802, "calib/mu_w": 0.6686111111111112, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011850393700787384, "calib/std_conf": 0.0541978862578768, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5736004331348132, "calib/step_q_c_n": 1847.0, "calib/step_q_gap": -0.02481023971901508, "calib/step_q_w": 0.5984106728538283, "calib/step_q_w_n": 862.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 797.0, "completions/mean_terminated_length": 803.2755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.14933333333333335, "grad_norm": 0.7780373096466064, "learning_rate": 1.6944444444444446e-06, "loss": -0.0067, "num_tokens": 38423128.0, "reward": 1.20703125, "reward_std": 0.18714842200279236, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.9921875, "step": 140 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 13.09765625, "calib/ece": 0.11514056224899591, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": -0.014134290540540495, "calib/mean_conf": 0.6630923694779117, "calib/mu_c": 0.6594594594594595, "calib/mu_w": 0.67359375, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01763052208835339, "calib/std_conf": 0.06142611964552913, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5822583686940123, "calib/step_q_c_n": 2121.0, "calib/step_q_gap": -0.04230331961767608, "calib/step_q_w": 0.6245616883116883, "calib/step_q_w_n": 1232.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2920.0, "completions/max_terminated_length": 2920.0, "completions/mean_length": 835.42578125, "completions/mean_terminated_length": 852.0677490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 322.0, "epoch": 0.1504, "grad_norm": 1.050325870513916, "learning_rate": 1.6666666666666667e-06, "loss": -0.0231, "num_tokens": 38744093.0, "reward": 1.208984375, "reward_std": 0.14863857626914978, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.97265625, "step": 141 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 11.23046875, "calib/ece": 0.10089843749999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.020271330500163387, "calib/mean_conf": 0.6598828125, "calib/mu_c": 0.652360248447205, "calib/mu_w": 0.6726315789473684, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06593750000000001, "calib/std_conf": 0.06411343963702029, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5813268236645606, "calib/step_q_c_n": 1741.0, "calib/step_q_gap": -0.0040788200744165115, "calib/step_q_w": 0.5854056437389771, "calib/step_q_w_n": 1134.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2251.0, "completions/max_terminated_length": 2251.0, "completions/mean_length": 833.3828125, "completions/mean_terminated_length": 839.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.15146666666666667, "grad_norm": 0.21071504056453705, "learning_rate": 1.638888888888889e-06, "loss": 0.0005, "num_tokens": 39062599.0, "reward": 1.12890625, "reward_std": 0.17084911465644836, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 1.0, "step": 142 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 12.16015625, "calib/ece": 0.1629149797570851, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": -0.041809460062472215, "calib/mean_conf": 0.6544939271255061, "calib/mu_c": 0.6407831325301203, "calib/mu_w": 0.6825925925925925, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07267206477732792, "calib/std_conf": 0.06410112648434106, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5699192618223761, "calib/step_q_c_n": 1734.0, "calib/step_q_gap": -0.05538893252135124, "calib/step_q_w": 0.6253081943437273, "calib/step_q_w_n": 1379.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2411.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 781.33203125, "completions/mean_terminated_length": 809.8016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 296.0, "epoch": 0.15253333333333333, "grad_norm": 0.2845354378223419, "learning_rate": 1.6111111111111113e-06, "loss": -0.0763, "num_tokens": 39369956.0, "reward": 1.130859375, "reward_std": 0.22284802794456482, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.96484375, "step": 143 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 10.984375, "calib/ece": 0.11803921568627448, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005136408730158748, "calib/mean_conf": 0.6484705882352939, "calib/mu_c": 0.6497395833333334, "calib/mu_w": 0.6446031746031746, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0067843137254901984, "calib/std_conf": 0.053089031008104394, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.577141472868217, "calib/step_q_c_n": 2064.0, "calib/step_q_gap": -0.005759596650499521, "calib/step_q_w": 0.5829010695187166, "calib/step_q_w_n": 748.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 760.26953125, "completions/mean_terminated_length": 766.2559204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1536, "grad_norm": 0.31746000051498413, "learning_rate": 1.5833333333333333e-06, "loss": -0.0168, "num_tokens": 39668713.0, "reward": 1.248046875, "reward_std": 0.1931958794593811, "rewards/accuracy_reward_step": 0.75, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 11.6953125, "calib/ece": 0.11761904761904765, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.00792102015631424, "calib/mean_conf": 0.663968253968254, "calib/mu_c": 0.6619251336898396, "calib/mu_w": 0.6698461538461539, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019761904761904737, "calib/std_conf": 0.06784762766295559, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5882366697848456, "calib/step_q_c_n": 2138.0, "calib/step_q_gap": -0.011109124607677767, "calib/step_q_w": 0.5993457943925233, "calib/step_q_w_n": 856.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2386.0, "completions/max_terminated_length": 2386.0, "completions/mean_length": 785.81640625, "completions/mean_terminated_length": 798.2897338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.15466666666666667, "grad_norm": 0.18704430758953094, "learning_rate": 1.5555555555555558e-06, "loss": -0.0404, "num_tokens": 39972586.0, "reward": 1.22265625, "reward_std": 0.16047047078609467, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 0.984375, "step": 145 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 12.0390625, "calib/ece": 0.11702040816326532, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.0030535279805352644, "calib/mean_conf": 0.6599591836734694, "calib/mu_c": 0.6586131386861314, "calib/mu_w": 0.6616666666666666, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10889795918367348, "calib/std_conf": 0.0667343069326609, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5916322580645161, "calib/step_q_c_n": 1550.0, "calib/step_q_gap": -0.02187296386759885, "calib/step_q_w": 0.613505221932115, "calib/step_q_w_n": 1532.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2737.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 779.09765625, "completions/mean_terminated_length": 814.0775146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 279.0, "epoch": 0.15573333333333333, "grad_norm": 0.2517707347869873, "learning_rate": 1.527777777777778e-06, "loss": -0.064, "num_tokens": 40279251.0, "reward": 1.013671875, "reward_std": 0.20814210176467896, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.95703125, "step": 146 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 11.53515625, "calib/ece": 0.039439999999999954, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.003721590909090855, "calib/mean_conf": 0.6544800000000001, "calib/mu_c": 0.6559090909090909, "calib/mu_w": 0.6521875, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03895999999999995, "calib/std_conf": 0.05965844114624518, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5829066171923315, "calib/step_q_c_n": 1617.0, "calib/step_q_gap": -0.026127813945393097, "calib/step_q_w": 0.6090344311377246, "calib/step_q_w_n": 1336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 786.4453125, "completions/mean_terminated_length": 798.9285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 346.0, "epoch": 0.1568, "grad_norm": 0.27295610308647156, "learning_rate": 1.5e-06, "loss": 0.0041, "num_tokens": 40584261.0, "reward": 1.08984375, "reward_std": 0.14193546772003174, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.9765625, "step": 147 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 12.4453125, "calib/ece": 0.1605533596837945, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.020801169590643376, "calib/mean_conf": 0.656600790513834, "calib/mu_c": 0.6514210526315789, "calib/mu_w": 0.6722222222222223, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03308300395256916, "calib/std_conf": 0.06462652448769993, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5864745011086474, "calib/step_q_c_n": 2255.0, "calib/step_q_gap": -0.01205396290853844, "calib/step_q_w": 0.5985284640171858, "calib/step_q_w_n": 931.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 754.83984375, "completions/mean_terminated_length": 760.783447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.15786666666666666, "grad_norm": 0.25415608286857605, "learning_rate": 1.4722222222222225e-06, "loss": 0.006, "num_tokens": 40882612.0, "reward": 1.236328125, "reward_std": 0.17967022955417633, "rewards/accuracy_reward_step": 0.7421875, "rewards/format_reward_step": 0.98828125, "step": 148 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 12.25390625, "calib/ece": 0.14382470119521903, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.048063143752798854, "calib/mean_conf": 0.6673306772908366, "calib/mu_c": 0.6525862068965518, "calib/mu_w": 0.7006493506493506, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.058964143426294795, "calib/std_conf": 0.07435008054622753, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5768194217130387, "calib/step_q_c_n": 1833.0, "calib/step_q_gap": -0.04203418258143987, "calib/step_q_w": 0.6188536042944786, "calib/step_q_w_n": 1304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 835.87890625, "completions/mean_terminated_length": 852.5299072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 281.0, "epoch": 0.15893333333333334, "grad_norm": 0.9107939600944519, "learning_rate": 1.4444444444444445e-06, "loss": -0.0662, "num_tokens": 41201053.0, "reward": 1.169921875, "reward_std": 0.20084260404109955, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.98046875, "step": 149 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 11.91015625, "calib/ece": 0.12536000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.02, "calib/gap": -0.04635688011410555, "calib/mean_conf": 0.64896, "calib/mu_c": 0.6346820809248556, "calib/mu_w": 0.6810389610389611, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04115999999999999, "calib/std_conf": 0.07070868687792187, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5711497476163769, "calib/step_q_c_n": 1783.0, "calib/step_q_gap": -0.05457695064586632, "calib/step_q_w": 0.6257266982622433, "calib/step_q_w_n": 1266.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2558.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 701.390625, "completions/mean_terminated_length": 721.1083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.16, "grad_norm": 0.2597368657588959, "learning_rate": 1.4166666666666667e-06, "loss": -0.069, "num_tokens": 41485569.0, "reward": 1.1640625, "reward_std": 0.1759205013513565, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.9765625, "step": 150 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 14.2578125, "calib/ece": 0.10184647302904559, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.03319502074688797, "calib/gap": -0.020073888404533613, "calib/mean_conf": 0.670253112033195, "calib/mu_c": 0.6625067567567566, "calib/mu_w": 0.6825806451612902, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07899585062240659, "calib/std_conf": 0.1014594242664176, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6037099841521395, "calib/step_q_c_n": 1893.0, "calib/step_q_gap": -0.04592575859117287, "calib/step_q_w": 0.6496357427433124, "calib/step_q_w_n": 1757.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 826.16015625, "completions/mean_terminated_length": 873.9545288085938, "completions/min_length": 0.0, "completions/min_terminated_length": 308.0, "epoch": 0.16106666666666666, "grad_norm": 0.4585876762866974, "learning_rate": 1.3888888888888892e-06, "loss": -0.1048, "num_tokens": 41804090.0, "reward": 1.048828125, "reward_std": 0.20975670218467712, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.94140625, "step": 151 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 14.57421875, "calib/ece": 0.1368181818181818, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.012396694214876033, "calib/gap": -0.040026748804409484, "calib/mean_conf": 0.6561570247933884, "calib/mu_c": 0.6440828402366864, "calib/mu_w": 0.6841095890410959, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04731404958677685, "calib/std_conf": 0.07951105835709585, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5904891578416541, "calib/step_q_c_n": 1983.0, "calib/step_q_gap": -0.08152457213546249, "calib/step_q_w": 0.6720137299771166, "calib/step_q_w_n": 1748.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 767.5, "completions/mean_terminated_length": 811.9008178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 306.0, "epoch": 0.16213333333333332, "grad_norm": 0.5712304711341858, "learning_rate": 1.3611111111111112e-06, "loss": -0.0978, "num_tokens": 42105962.0, "reward": 1.130859375, "reward_std": 0.284695029258728, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.94140625, "step": 152 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 14.12109375, "calib/ece": 0.06963265306122443, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0163265306122449, "calib/gap": -0.01282914493440801, "calib/mean_conf": 0.6707755102040817, "calib/mu_c": 0.6669005847953217, "calib/mu_w": 0.6797297297297297, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.021224489795918337, "calib/std_conf": 0.06483110405747476, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.591789375582479, "calib/step_q_c_n": 2146.0, "calib/step_q_gap": -0.05262178847470278, "calib/step_q_w": 0.6444111640571818, "calib/step_q_w_n": 1469.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2854.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 878.5859375, "completions/mean_terminated_length": 910.5992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 342.0, "epoch": 0.1632, "grad_norm": 0.36940431594848633, "learning_rate": 1.3333333333333334e-06, "loss": -0.022, "num_tokens": 42438200.0, "reward": 1.146484375, "reward_std": 0.23738202452659607, "rewards/accuracy_reward_step": 0.66796875, "rewards/format_reward_step": 0.95703125, "step": 153 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 13.66796875, "calib/ece": 0.06636363636363632, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.008264462809917356, "calib/gap": -0.0007320205479453001, "calib/mean_conf": 0.660495867768595, "calib/mu_c": 0.6602054794520548, "calib/mu_w": 0.6609375000000001, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06177685950413221, "calib/std_conf": 0.060968102209736655, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5861912658927585, "calib/step_q_c_n": 1809.0, "calib/step_q_gap": -0.0726430536338688, "calib/step_q_w": 0.6588343195266273, "calib/step_q_w_n": 1690.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 780.71875, "completions/mean_terminated_length": 819.1146850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 346.0, "epoch": 0.16426666666666667, "grad_norm": 0.7874165773391724, "learning_rate": 1.3055555555555556e-06, "loss": -0.0603, "num_tokens": 42742504.0, "reward": 1.04296875, "reward_std": 0.20134729146957397, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.9453125, "step": 154 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 14.3828125, "calib/ece": 0.054918699186991944, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011882698768197142, "calib/mean_conf": 0.6622357723577236, "calib/mu_c": 0.6667763157894736, "calib/mu_w": 0.6548936170212765, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.049634146341463424, "calib/std_conf": 0.054945291534414825, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5891995997998999, "calib/step_q_c_n": 1999.0, "calib/step_q_gap": -0.05192339485250663, "calib/step_q_w": 0.6411229946524065, "calib/step_q_w_n": 1683.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 771.58984375, "completions/mean_terminated_length": 793.2810668945312, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.16533333333333333, "grad_norm": 0.4282694458961487, "learning_rate": 1.2777777777777779e-06, "loss": -0.0058, "num_tokens": 43047247.0, "reward": 1.07421875, "reward_std": 0.2485809326171875, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.9609375, "step": 155 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 12.7265625, "calib/ece": 0.08524556451612905, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.008064516129032258, "calib/gap": -0.01273744047619041, "calib/mean_conf": 0.6636326612903225, "calib/mu_c": 0.6595238095238095, "calib/mu_w": 0.6722612499999999, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03572943548387098, "calib/std_conf": 0.07622542294139767, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.584796833773087, "calib/step_q_c_n": 1895.0, "calib/step_q_gap": -0.03450778838391966, "calib/step_q_w": 0.6193046221570067, "calib/step_q_w_n": 1363.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 821.12109375, "completions/mean_terminated_length": 837.4780883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 355.0, "epoch": 0.1664, "grad_norm": 0.3223394453525543, "learning_rate": 1.25e-06, "loss": -0.0287, "num_tokens": 43362214.0, "reward": 1.140625, "reward_std": 0.16987359523773193, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.96875, "step": 156 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 12.8671875, "calib/ece": 0.08476377952755902, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.012634310134310267, "calib/mean_conf": 0.6706692913385827, "calib/mu_c": 0.667087912087912, "calib/mu_w": 0.6797222222222222, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019448818897637794, "calib/std_conf": 0.06856189285212524, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5906270022883295, "calib/step_q_c_n": 2185.0, "calib/step_q_gap": -0.03149202386135497, "calib/step_q_w": 0.6221190261496845, "calib/step_q_w_n": 1109.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 817.640625, "completions/mean_terminated_length": 824.0787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.16746666666666668, "grad_norm": 0.7573793530464172, "learning_rate": 1.2222222222222223e-06, "loss": -0.023, "num_tokens": 43675258.0, "reward": 1.20703125, "reward_std": 0.2888256311416626, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.9921875, "step": 157 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 12.45703125, "calib/ece": 0.1071653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.008466954022988582, "calib/mean_conf": 0.6615748031496064, "calib/mu_c": 0.6589080459770115, "calib/mu_w": 0.667375, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04185039370078742, "calib/std_conf": 0.067938429708527, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5890804044294655, "calib/step_q_c_n": 2077.0, "calib/step_q_gap": -0.018104847369095678, "calib/step_q_w": 0.6071852517985612, "calib/step_q_w_n": 1112.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 786.75, "completions/mean_terminated_length": 792.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 301.0, "epoch": 0.16853333333333334, "grad_norm": 0.33515551686286926, "learning_rate": 1.1944444444444446e-06, "loss": -0.0143, "num_tokens": 43981906.0, "reward": 1.17578125, "reward_std": 0.18715086579322815, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.9921875, "step": 158 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 13.65625, "calib/ece": 0.09509599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.013936188315371578, "calib/mean_conf": 0.6574159999999999, "calib/mu_c": 0.6526219512195122, "calib/mu_w": 0.6665581395348837, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04825599999999999, "calib/std_conf": 0.07192125516146114, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5841757105943153, "calib/step_q_c_n": 1935.0, "calib/step_q_gap": -0.04489924135956047, "calib/step_q_w": 0.6290749519538757, "calib/step_q_w_n": 1561.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 776.61328125, "completions/mean_terminated_length": 785.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.1696, "grad_norm": 0.5414705872535706, "learning_rate": 1.1666666666666668e-06, "loss": -0.0035, "num_tokens": 44285503.0, "reward": 1.12890625, "reward_std": 0.18605078756809235, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.9765625, "step": 159 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 13.859375, "calib/ece": 0.1265587044534413, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": -0.03826023391812872, "calib/mean_conf": 0.6573279352226721, "calib/mu_c": 0.6455555555555554, "calib/mu_w": 0.6838157894736842, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04578947368421053, "calib/std_conf": 0.0572103313497274, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5782809330628803, "calib/step_q_c_n": 1972.0, "calib/step_q_gap": -0.0440436227746831, "calib/step_q_w": 0.6223245558375634, "calib/step_q_w_n": 1576.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 755.3125, "completions/mean_terminated_length": 779.6773681640625, "completions/min_length": 0.0, "completions/min_terminated_length": 313.0, "epoch": 0.17066666666666666, "grad_norm": 0.48375532031059265, "learning_rate": 1.138888888888889e-06, "loss": -0.0286, "num_tokens": 44583703.0, "reward": 1.150390625, "reward_std": 0.2401934415102005, "rewards/accuracy_reward_step": 0.66796875, "rewards/format_reward_step": 0.96484375, "step": 160 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 13.19921875, "calib/ece": 0.15460317460317463, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.010598170302603793, "calib/mean_conf": 0.655952380952381, "calib/mu_c": 0.6538916256157636, "calib/mu_w": 0.6644897959183674, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0024999999999999996, "calib/std_conf": 0.053186980317399546, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5826842105263158, "calib/step_q_c_n": 2470.0, "calib/step_q_gap": -0.0590759654912858, "calib/step_q_w": 0.6417601760176016, "calib/step_q_w_n": 909.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 740.15625, "completions/mean_terminated_length": 754.900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 300.0, "epoch": 0.17173333333333332, "grad_norm": 0.218248188495636, "learning_rate": 1.111111111111111e-06, "loss": -0.0208, "num_tokens": 44877103.0, "reward": 1.28515625, "reward_std": 0.15109506249427795, "rewards/accuracy_reward_step": 0.79296875, "rewards/format_reward_step": 0.984375, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 11.36328125, "calib/ece": 0.14329411764705888, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.0014897733554450898, "calib/mean_conf": 0.6524705882352941, "calib/mu_c": 0.6527860696517412, "calib/mu_w": 0.6512962962962962, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003764705882352941, "calib/std_conf": 0.05308312125712916, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5773848238482384, "calib/step_q_c_n": 2214.0, "calib/step_q_gap": -0.01855762219492707, "calib/step_q_w": 0.5959424460431655, "calib/step_q_w_n": 695.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 757.2890625, "completions/mean_terminated_length": 760.2588500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 311.0, "epoch": 0.1728, "grad_norm": 0.23682500422000885, "learning_rate": 1.0833333333333335e-06, "loss": 0.0092, "num_tokens": 45175113.0, "reward": 1.283203125, "reward_std": 0.15493015944957733, "rewards/accuracy_reward_step": 0.78515625, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 16.49609375, "calib/ece": 0.08908713692946066, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.02074688796680498, "calib/gap": -0.029056327160493778, "calib/mean_conf": 0.6812033195020748, "calib/mu_c": 0.6714375, "calib/mu_w": 0.7004938271604938, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05319502074688799, "calib/std_conf": 0.07385505141617801, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5897835099032704, "calib/step_q_c_n": 2171.0, "calib/step_q_gap": -0.07958783512596934, "calib/step_q_w": 0.6693713450292398, "calib/step_q_w_n": 2052.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 876.32421875, "completions/mean_terminated_length": 915.6693725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.17386666666666667, "grad_norm": 0.3832489848136902, "learning_rate": 1.0555555555555557e-06, "loss": -0.0285, "num_tokens": 45504284.0, "reward": 1.095703125, "reward_std": 0.20130212604999542, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.94140625, "step": 163 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 15.13671875, "calib/ece": 0.2001639344262296, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.012295081967213115, "calib/gap": -0.04190548780487813, "calib/mean_conf": 0.6774590163934424, "calib/mu_c": 0.6637195121951219, "calib/mu_w": 0.7056250000000001, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10274590163934427, "calib/std_conf": 0.06997144002165918, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5873583093179635, "calib/step_q_c_n": 2082.0, "calib/step_q_gap": -0.050770692355210034, "calib/step_q_w": 0.6381290016731735, "calib/step_q_w_n": 1793.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 882.9140625, "completions/mean_terminated_length": 915.0850219726562, "completions/min_length": 0.0, "completions/min_terminated_length": 387.0, "epoch": 0.17493333333333333, "grad_norm": 0.20860423147678375, "learning_rate": 1.0277777777777777e-06, "loss": -0.07, "num_tokens": 45836446.0, "reward": 1.1171875, "reward_std": 0.23892061412334442, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.953125, "step": 164 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 14.03515625, "calib/ece": 0.13533596837944664, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.02319058495114834, "calib/mean_conf": 0.668695652173913, "calib/mu_c": 0.6585211267605633, "calib/mu_w": 0.6817117117117116, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12138339920948618, "calib/std_conf": 0.06808125711257992, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5844966063348417, "calib/step_q_c_n": 1768.0, "calib/step_q_gap": -0.03118010599392551, "calib/step_q_w": 0.6156767123287672, "calib/step_q_w_n": 1825.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2477.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 856.7890625, "completions/mean_terminated_length": 870.388916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 330.0, "epoch": 0.176, "grad_norm": 0.20547796785831451, "learning_rate": 1.0000000000000002e-06, "loss": -0.0252, "num_tokens": 46161360.0, "reward": 1.048828125, "reward_std": 0.19807085394859314, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 14.03515625, "calib/ece": 0.17277551020408166, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0163265306122449, "calib/gap": -0.0472100122100122, "calib/mean_conf": 0.6703265306122449, "calib/mu_c": 0.6581868131868132, "calib/mu_w": 0.7053968253968254, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.050122448979591866, "calib/std_conf": 0.0650078135822061, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5863866666666667, "calib/step_q_c_n": 2250.0, "calib/step_q_gap": -0.05549717547778599, "calib/step_q_w": 0.6418838421444527, "calib/step_q_w_n": 1343.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2158.0, "completions/max_terminated_length": 2158.0, "completions/mean_length": 778.828125, "completions/mean_terminated_length": 817.131103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 0.17706666666666668, "grad_norm": 0.2849808633327484, "learning_rate": 9.722222222222224e-07, "loss": -0.0281, "num_tokens": 46466924.0, "reward": 1.189453125, "reward_std": 0.17397813498973846, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.95703125, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 13.97265625, "calib/ece": 0.2165354330708662, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": -0.024596296296296782, "calib/mean_conf": 0.6769291338582677, "calib/mu_c": 0.6717, "calib/mu_w": 0.6962962962962967, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05303149606299212, "calib/std_conf": 0.07389087304750322, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5909087550794238, "calib/step_q_c_n": 2707.0, "calib/step_q_gap": -0.04283837135735791, "calib/step_q_w": 0.6337471264367817, "calib/step_q_w_n": 870.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2335.0, "completions/max_terminated_length": 2335.0, "completions/mean_length": 841.6796875, "completions/mean_terminated_length": 851.6600952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 316.0, "epoch": 0.17813333333333334, "grad_norm": 0.2347254455089569, "learning_rate": 9.444444444444445e-07, "loss": -0.007, "num_tokens": 46788002.0, "reward": 1.27734375, "reward_std": 0.09994982928037643, "rewards/accuracy_reward_step": 0.78125, "rewards/format_reward_step": 0.9921875, "step": 167 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 16.671875, "calib/ece": 0.14452282157676355, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.008298755186721992, "calib/gap": -0.04282077922077909, "calib/mean_conf": 0.6852697095435684, "calib/mu_c": 0.6735428571428572, "calib/mu_w": 0.7163636363636363, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05182572614107889, "calib/std_conf": 0.07231809254864334, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5977239413680782, "calib/step_q_c_n": 2456.0, "calib/step_q_gap": -0.05827666569593948, "calib/step_q_w": 0.6560006070640176, "calib/step_q_w_n": 1812.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 873.77734375, "completions/mean_terminated_length": 924.326416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.1792, "grad_norm": 0.23246829211711884, "learning_rate": 9.166666666666666e-07, "loss": -0.0611, "num_tokens": 47116361.0, "reward": 1.154296875, "reward_std": 0.25926464796066284, "rewards/accuracy_reward_step": 0.68359375, "rewards/format_reward_step": 0.94140625, "step": 168 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 12.6171875, "calib/ece": 0.07893700787401581, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005826411960132782, "calib/mean_conf": 0.6689370078740159, "calib/mu_c": 0.6669642857142858, "calib/mu_w": 0.6727906976744186, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04322834645669292, "calib/std_conf": 0.05315306200489301, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5849169921874999, "calib/step_q_c_n": 2048.0, "calib/step_q_gap": -0.007663380062923109, "calib/step_q_w": 0.592580372250423, "calib/step_q_w_n": 1182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 846.12109375, "completions/mean_terminated_length": 852.783447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 316.0, "epoch": 0.18026666666666666, "grad_norm": 0.30217358469963074, "learning_rate": 8.88888888888889e-07, "loss": 0.0012, "num_tokens": 47437152.0, "reward": 1.15234375, "reward_std": 0.17056959867477417, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.9921875, "step": 169 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 13.07421875, "calib/ece": 0.0821428571428572, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.0013406593406593004, "calib/mean_conf": 0.6703174603174604, "calib/mu_c": 0.669945054945055, "calib/mu_w": 0.6712857142857143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015119047619047627, "calib/std_conf": 0.05874259842516748, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5910180412371134, "calib/step_q_c_n": 2328.0, "calib/step_q_gap": -0.013663018625496903, "calib/step_q_w": 0.6046810598626103, "calib/step_q_w_n": 1019.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 826.29296875, "completions/mean_terminated_length": 832.7991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 355.0, "epoch": 0.18133333333333335, "grad_norm": 0.25517553091049194, "learning_rate": 8.611111111111112e-07, "loss": -0.0104, "num_tokens": 47752835.0, "reward": 1.20703125, "reward_std": 0.18111488223075867, "rewards/accuracy_reward_step": 0.71484375, "rewards/format_reward_step": 0.984375, "step": 170 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 13.5859375, "calib/ece": 0.06722222222222217, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010637990951412823, "calib/mean_conf": 0.6609523809523808, "calib/mu_c": 0.6566887417218544, "calib/mu_w": 0.6673267326732673, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06448412698412694, "calib/std_conf": 0.053257812176460355, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5830900372935536, "calib/step_q_c_n": 1877.0, "calib/step_q_gap": -0.013262242531555724, "calib/step_q_w": 0.5963522798251093, "calib/step_q_w_n": 1601.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1948.0, "completions/max_terminated_length": 1948.0, "completions/mean_length": 794.51171875, "completions/mean_terminated_length": 807.123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.1824, "grad_norm": 0.20195415616035461, "learning_rate": 8.333333333333333e-07, "loss": -0.0151, "num_tokens": 48063126.0, "reward": 1.08203125, "reward_std": 0.14921081066131592, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.984375, "step": 171 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 12.57421875, "calib/ece": 0.1586799999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02331443688586554, "calib/mean_conf": 0.65524, "calib/mu_c": 0.6502040816326531, "calib/mu_w": 0.6735185185185186, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014960000000000001, "calib/std_conf": 0.05578657903116126, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5797955555555555, "calib/step_q_c_n": 2250.0, "calib/step_q_gap": -0.038842215342277386, "calib/step_q_w": 0.6186377708978329, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 767.859375, "completions/mean_terminated_length": 780.0476684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 361.0, "epoch": 0.18346666666666667, "grad_norm": 0.30305296182632446, "learning_rate": 8.055555555555557e-07, "loss": -0.0186, "num_tokens": 48363050.0, "reward": 1.25390625, "reward_std": 0.1857965886592865, "rewards/accuracy_reward_step": 0.765625, "rewards/format_reward_step": 0.9765625, "step": 172 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 13.6953125, "calib/ece": 0.07822134387351778, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03557312252964427, "calib/gap": 0.011060606060606215, "calib/mean_conf": 0.6734782608695652, "calib/mu_c": 0.6763636363636365, "calib/mu_w": 0.6653030303030303, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0062845849802371494, "calib/std_conf": 0.08751598680459782, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.602466456195738, "calib/step_q_c_n": 2534.0, "calib/step_q_gap": -0.00825370841331552, "calib/step_q_w": 0.6107201646090535, "calib/step_q_w_n": 972.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 839.54296875, "completions/mean_terminated_length": 849.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.18453333333333333, "grad_norm": 0.3388940393924713, "learning_rate": 7.777777777777779e-07, "loss": -0.0148, "num_tokens": 48681133.0, "reward": 1.22265625, "reward_std": 0.17308899760246277, "rewards/accuracy_reward_step": 0.73046875, "rewards/format_reward_step": 0.984375, "step": 173 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 15.359375, "calib/ece": 0.06844537815126057, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.008403361344537815, "calib/gap": 0.007239896180941785, "calib/mean_conf": 0.6717226890756303, "calib/mu_c": 0.674551724137931, "calib/mu_w": 0.6673118279569892, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06546218487394966, "calib/std_conf": 0.05931661583213658, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5958893657606105, "calib/step_q_c_n": 2097.0, "calib/step_q_gap": -0.051729162849743826, "calib/step_q_w": 0.6476185286103543, "calib/step_q_w_n": 1835.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 860.96875, "completions/mean_terminated_length": 914.5560913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 369.0, "epoch": 0.1856, "grad_norm": 0.2765766978263855, "learning_rate": 7.5e-07, "loss": -0.0608, "num_tokens": 49005773.0, "reward": 1.029296875, "reward_std": 0.3664853870868683, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.92578125, "step": 174 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 14.53125, "calib/ece": 0.1834146341463415, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012195121951219513, "calib/gap": -0.02279263771186446, "calib/mean_conf": 0.6778861788617886, "calib/mu_c": 0.6669531249999999, "calib/mu_w": 0.6897457627118644, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1704878048780488, "calib/std_conf": 0.07265300124195796, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5895738295318127, "calib/step_q_c_n": 1666.0, "calib/step_q_gap": -0.03702792314588932, "calib/step_q_w": 0.626601752677702, "calib/step_q_w_n": 2054.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 846.40625, "completions/mean_terminated_length": 877.2470092773438, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.18666666666666668, "grad_norm": 0.24171149730682373, "learning_rate": 7.222222222222222e-07, "loss": -0.062, "num_tokens": 49328277.0, "reward": 0.98046875, "reward_std": 0.25995826721191406, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.9609375, "step": 175 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 15.5625, "calib/ece": 0.08387999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": -0.016378672621955692, "calib/mean_conf": 0.6831600000000001, "calib/mu_c": 0.6773291925465837, "calib/mu_w": 0.6937078651685394, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06152000000000002, "calib/std_conf": 0.07869951969357881, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6036913043478261, "calib/step_q_c_n": 2300.0, "calib/step_q_gap": -0.02549189042652067, "calib/step_q_w": 0.6291831947743468, "calib/step_q_w_n": 1684.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 873.48046875, "completions/mean_terminated_length": 898.0361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.18773333333333334, "grad_norm": 0.20827040076255798, "learning_rate": 6.944444444444446e-07, "loss": -0.0165, "num_tokens": 49655952.0, "reward": 1.1171875, "reward_std": 0.2059280127286911, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.9765625, "step": 176 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 13.60546875, "calib/ece": 0.15513944223107565, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.03370410212277686, "calib/mean_conf": 0.6635856573705179, "calib/mu_c": 0.6524404761904763, "calib/mu_w": 0.6861445783132532, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0747011952191235, "calib/std_conf": 0.059284341111813736, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.583358208955224, "calib/step_q_c_n": 2010.0, "calib/step_q_gap": -0.034155097222644315, "calib/step_q_w": 0.6175133061778683, "calib/step_q_w_n": 1473.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2894.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 824.33984375, "completions/mean_terminated_length": 834.1146850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 336.0, "epoch": 0.1888, "grad_norm": 0.21935386955738068, "learning_rate": 6.666666666666667e-07, "loss": -0.0015, "num_tokens": 49970815.0, "reward": 1.146484375, "reward_std": 0.12562449276447296, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 13.89453125, "calib/ece": 0.060912698412698404, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.003250896319601848, "calib/mean_conf": 0.6672619047619048, "calib/mu_c": 0.6662427745664741, "calib/mu_w": 0.6694936708860759, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020833333333333322, "calib/std_conf": 0.05504726375729799, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5884535186794092, "calib/step_q_c_n": 2302.0, "calib/step_q_gap": -0.011267596858439366, "calib/step_q_w": 0.5997211155378486, "calib/step_q_w_n": 1255.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1764.0, "completions/max_terminated_length": 1764.0, "completions/mean_length": 773.0234375, "completions/mean_terminated_length": 785.293701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.18986666666666666, "grad_norm": 0.2755891978740692, "learning_rate": 6.388888888888889e-07, "loss": -0.0395, "num_tokens": 50274781.0, "reward": 1.16796875, "reward_std": 0.20019932091236115, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.984375, "step": 178 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 13.35546875, "calib/ece": 0.11400793650793654, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.0018976911853059164, "calib/mean_conf": 0.6651190476190476, "calib/mu_c": 0.6646596858638745, "calib/mu_w": 0.6665573770491804, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010595238095238095, "calib/std_conf": 0.060411828330779516, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5900040832993059, "calib/step_q_c_n": 2449.0, "calib/step_q_gap": -0.007501071339869325, "calib/step_q_w": 0.5975051546391752, "calib/step_q_w_n": 970.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 827.62890625, "completions/mean_terminated_length": 840.7659301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.19093333333333334, "grad_norm": 0.2493710219860077, "learning_rate": 6.111111111111112e-07, "loss": -0.0272, "num_tokens": 50592918.0, "reward": 1.23828125, "reward_std": 0.16652068495750427, "rewards/accuracy_reward_step": 0.74609375, "rewards/format_reward_step": 0.984375, "step": 179 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 17.390625, "calib/ece": 0.043724696356275315, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020242914979757085, "calib/gap": -0.0009728308501313343, "calib/mean_conf": 0.7036437246963563, "calib/mu_c": 0.703312883435583, "calib/mu_w": 0.7042857142857143, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.043724696356275315, "calib/std_conf": 0.07650568626887001, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6103323262839879, "calib/step_q_c_n": 2648.0, "calib/step_q_gap": -0.026618893228207208, "calib/step_q_w": 0.6369512195121951, "calib/step_q_w_n": 1804.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2547.0, "completions/max_terminated_length": 2547.0, "completions/mean_length": 965.7421875, "completions/mean_terminated_length": 1004.9999389648438, "completions/min_length": 0.0, "completions/min_terminated_length": 390.0, "epoch": 0.192, "grad_norm": 0.2207210808992386, "learning_rate": 5.833333333333334e-07, "loss": -0.0376, "num_tokens": 50944004.0, "reward": 1.119140625, "reward_std": 0.2057252824306488, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.96484375, "step": 180 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 13.69140625, "calib/ece": 0.05204724409448826, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.007761627906976765, "calib/mean_conf": 0.6663779527559055, "calib/mu_c": 0.66375, "calib/mu_w": 0.6715116279069767, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028503937007874056, "calib/std_conf": 0.05793247890729539, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5896074933095451, "calib/step_q_c_n": 2242.0, "calib/step_q_gap": -0.008777304790217322, "calib/step_q_w": 0.5983847980997624, "calib/step_q_w_n": 1263.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 820.90234375, "completions/mean_terminated_length": 824.1216430664062, "completions/min_length": 0.0, "completions/min_terminated_length": 363.0, "epoch": 0.19306666666666666, "grad_norm": 0.27567002177238464, "learning_rate": 5.555555555555555e-07, "loss": 0.0064, "num_tokens": 51260419.0, "reward": 1.15234375, "reward_std": 0.2041134089231491, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.9921875, "step": 181 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 14.12890625, "calib/ece": 0.1293700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.036599099099099086, "calib/mean_conf": 0.6714960629921259, "calib/mu_c": 0.6608333333333333, "calib/mu_w": 0.6974324324324324, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.046102362204724384, "calib/std_conf": 0.05996821651820224, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5833461210571185, "calib/step_q_c_n": 2346.0, "calib/step_q_gap": -0.03431713622061561, "calib/step_q_w": 0.6176632572777341, "calib/step_q_w_n": 1271.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2130.0, "completions/max_terminated_length": 2130.0, "completions/mean_length": 847.31640625, "completions/mean_terminated_length": 857.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 371.0, "epoch": 0.19413333333333332, "grad_norm": 0.2295883148908615, "learning_rate": 5.277777777777779e-07, "loss": -0.0192, "num_tokens": 51583492.0, "reward": 1.19921875, "reward_std": 0.19453556835651398, "rewards/accuracy_reward_step": 0.703125, "rewards/format_reward_step": 0.9921875, "step": 182 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 15.28515625, "calib/ece": 0.10800796812748993, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": -0.012878316288720715, "calib/mean_conf": 0.6839442231075696, "calib/mu_c": 0.6799421965317919, "calib/mu_w": 0.6928205128205126, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05135458167330675, "calib/std_conf": 0.07437375246865623, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6010756972111554, "calib/step_q_c_n": 2510.0, "calib/step_q_gap": -0.03647954156290023, "calib/step_q_w": 0.6375552387740556, "calib/step_q_w_n": 1403.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 914.36328125, "completions/mean_terminated_length": 928.8770141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 361.0, "epoch": 0.1952, "grad_norm": 0.2984082102775574, "learning_rate": 5.000000000000001e-07, "loss": -0.0115, "num_tokens": 51924249.0, "reward": 1.166015625, "reward_std": 0.258495569229126, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.98046875, "step": 183 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 14.609375, "calib/ece": 0.07559523809523806, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": -0.014779980102548329, "calib/mean_conf": 0.6736111111111112, "calib/mu_c": 0.6693296089385475, "calib/mu_w": 0.6841095890410959, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019444444444444445, "calib/std_conf": 0.07324707830158014, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5995702347791484, "calib/step_q_c_n": 2513.0, "calib/step_q_gap": -0.01725128111327212, "calib/step_q_w": 0.6168215158924205, "calib/step_q_w_n": 1227.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 845.875, "completions/mean_terminated_length": 862.72509765625, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 0.19626666666666667, "grad_norm": 0.21070480346679688, "learning_rate": 4.7222222222222226e-07, "loss": -0.0355, "num_tokens": 52246073.0, "reward": 1.19140625, "reward_std": 0.193123459815979, "rewards/accuracy_reward_step": 0.69921875, "rewards/format_reward_step": 0.984375, "step": 184 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 14.80859375, "calib/ece": 0.17412244897959178, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03939504827156637, "calib/mean_conf": 0.6626938775510205, "calib/mu_c": 0.6504733727810651, "calib/mu_w": 0.6898684210526315, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07351020408163263, "calib/std_conf": 0.056605402526337355, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5756875973015049, "calib/step_q_c_n": 1927.0, "calib/step_q_gap": -0.06673847565986846, "calib/step_q_w": 0.6424260729613733, "calib/step_q_w_n": 1864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 818.75, "completions/mean_terminated_length": 848.5830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 380.0, "epoch": 0.19733333333333333, "grad_norm": 0.1729198396205902, "learning_rate": 4.444444444444445e-07, "loss": -0.0276, "num_tokens": 52562593.0, "reward": 1.138671875, "reward_std": 0.12429915368556976, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.95703125, "step": 185 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 15.1953125, "calib/ece": 0.1281422924901186, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": -0.04608823529411754, "calib/mean_conf": 0.6743873517786562, "calib/mu_c": 0.662, "calib/mu_w": 0.7080882352941176, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.035652173913043476, "calib/std_conf": 0.06845067002867303, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5861823924200552, "calib/step_q_c_n": 2533.0, "calib/step_q_gap": -0.04835703278259762, "calib/step_q_w": 0.6345394252026528, "calib/step_q_w_n": 1357.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2769.0, "completions/max_terminated_length": 2769.0, "completions/mean_length": 894.87890625, "completions/mean_terminated_length": 901.9251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.1984, "grad_norm": 0.2917207181453705, "learning_rate": 4.1666666666666667e-07, "loss": -0.0176, "num_tokens": 52896722.0, "reward": 1.216796875, "reward_std": 0.1938859224319458, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.98828125, "step": 186 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 16.36328125, "calib/ece": 0.09082644628099174, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.008264462809917356, "calib/gap": -0.01920346320346311, "calib/mean_conf": 0.6867768595041321, "calib/mu_c": 0.6806666666666666, "calib/mu_w": 0.6998701298701298, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04789256198347107, "calib/std_conf": 0.0702264916229942, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5987443392342529, "calib/step_q_c_n": 2429.0, "calib/step_q_gap": -0.05200003576574719, "calib/step_q_w": 0.650744375, "calib/step_q_w_n": 1760.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2662.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 917.1640625, "completions/mean_terminated_length": 958.3428344726562, "completions/min_length": 0.0, "completions/min_terminated_length": 338.0, "epoch": 0.19946666666666665, "grad_norm": 0.2757011353969574, "learning_rate": 3.8888888888888895e-07, "loss": -0.0491, "num_tokens": 53233060.0, "reward": 1.1171875, "reward_std": 0.3008151352405548, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.9453125, "step": 187 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 15.234375, "calib/ece": 0.08810355731225292, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.00777241653418137, "calib/mean_conf": 0.6781430830039525, "calib/mu_c": 0.676054054054054, "calib/mu_w": 0.6838264705882354, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017510671936758898, "calib/std_conf": 0.0817776717111406, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5998210957882968, "calib/step_q_c_n": 2683.0, "calib/step_q_gap": -0.014320317523124682, "calib/step_q_w": 0.6141414133114215, "calib/step_q_w_n": 1217.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2373.0, "completions/max_terminated_length": 2373.0, "completions/mean_length": 909.578125, "completions/mean_terminated_length": 920.3636474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 341.0, "epoch": 0.20053333333333334, "grad_norm": 0.2577819526195526, "learning_rate": 3.611111111111111e-07, "loss": 0.0013, "num_tokens": 53569984.0, "reward": 1.216796875, "reward_std": 0.12154898792505264, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.98828125, "step": 188 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 13.40625, "calib/ece": 0.10988142292490109, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02540476190476182, "calib/mean_conf": 0.6591304347826086, "calib/mu_c": 0.6505952380952381, "calib/mu_w": 0.6759999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.052490118577075084, "calib/std_conf": 0.05169875862043467, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5799900199600799, "calib/step_q_c_n": 2004.0, "calib/step_q_gap": -0.02961082037605456, "calib/step_q_w": 0.6096008403361345, "calib/step_q_w_n": 1428.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 806.65234375, "completions/mean_terminated_length": 816.2174072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 376.0, "epoch": 0.2016, "grad_norm": 0.2876899838447571, "learning_rate": 3.3333333333333335e-07, "loss": -0.0313, "num_tokens": 53884255.0, "reward": 1.150390625, "reward_std": 0.2516007423400879, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.98828125, "step": 189 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 13.890625, "calib/ece": 0.0443873517786561, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007500000000000062, "calib/mean_conf": 0.6751383399209486, "calib/mu_c": 0.6725, "calib/mu_w": 0.68, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03565217391304347, "calib/std_conf": 0.05668441721105008, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5904176334106728, "calib/step_q_c_n": 2155.0, "calib/step_q_gap": -0.017084151029013173, "calib/step_q_w": 0.6075017844396859, "calib/step_q_w_n": 1401.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2056.0, "completions/max_terminated_length": 2056.0, "completions/mean_length": 877.1640625, "completions/mean_terminated_length": 891.0873413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.20266666666666666, "grad_norm": 0.20597021281719208, "learning_rate": 3.055555555555556e-07, "loss": -0.0274, "num_tokens": 54214417.0, "reward": 1.134765625, "reward_std": 0.16795945167541504, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.98828125, "step": 190 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 14.15625, "calib/ece": 0.02862348178137656, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001968152866241968, "calib/mean_conf": 0.664251012145749, "calib/mu_c": 0.664968152866242, "calib/mu_w": 0.663, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02862348178137656, "calib/std_conf": 0.05040871162286916, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5865340364333653, "calib/step_q_c_n": 2086.0, "calib/step_q_gap": -0.024348798417089812, "calib/step_q_w": 0.6108828348504551, "calib/step_q_w_n": 1538.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 822.21875, "completions/mean_terminated_length": 848.7418823242188, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.20373333333333332, "grad_norm": 0.22264082729816437, "learning_rate": 2.7777777777777776e-07, "loss": -0.051, "num_tokens": 54529073.0, "reward": 1.095703125, "reward_std": 0.16742515563964844, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.96484375, "step": 191 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 14.57421875, "calib/ece": 0.12988047808764938, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.032950000000000035, "calib/mean_conf": 0.6760956175298806, "calib/mu_c": 0.66625, "calib/mu_w": 0.6992, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05239043824701198, "calib/std_conf": 0.0739056603663103, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5940485312899106, "calib/step_q_c_n": 2349.0, "calib/step_q_gap": -0.03642180156103003, "calib/step_q_w": 0.6304703328509407, "calib/step_q_w_n": 1382.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 879.79296875, "completions/mean_terminated_length": 897.3187255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 324.0, "epoch": 0.2048, "grad_norm": 0.2249525636434555, "learning_rate": 2.5000000000000004e-07, "loss": -0.027, "num_tokens": 54859276.0, "reward": 1.177734375, "reward_std": 0.17005354166030884, "rewards/accuracy_reward_step": 0.6875, "rewards/format_reward_step": 0.98046875, "step": 192 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 15.44921875, "calib/ece": 0.11563265306122451, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": -0.02923701298701309, "calib/mean_conf": 0.6700816326530613, "calib/mu_c": 0.6608928571428572, "calib/mu_w": 0.6901298701298703, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05000000000000001, "calib/std_conf": 0.06241396911102182, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5874590909090909, "calib/step_q_c_n": 2200.0, "calib/step_q_gap": -0.062181934731934674, "calib/step_q_w": 0.6496410256410255, "calib/step_q_w_n": 1755.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 807.765625, "completions/mean_terminated_length": 840.6016235351562, "completions/min_length": 0.0, "completions/min_terminated_length": 380.0, "epoch": 0.20586666666666667, "grad_norm": 0.28256791830062866, "learning_rate": 2.2222222222222224e-07, "loss": -0.0349, "num_tokens": 55171776.0, "reward": 1.134765625, "reward_std": 0.21509431302547455, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.95703125, "step": 193 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 14.4375, "calib/ece": 0.09863281249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": -0.015414866032843477, "calib/mean_conf": 0.6708203125, "calib/mu_c": 0.6661235955056181, "calib/mu_w": 0.6815384615384615, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.037070312499999994, "calib/std_conf": 0.06361283449432469, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5915540820555326, "calib/step_q_c_n": 2413.0, "calib/step_q_gap": -0.009856673984997455, "calib/step_q_w": 0.6014107560405301, "calib/step_q_w_n": 1283.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2169.0, "completions/max_terminated_length": 2169.0, "completions/mean_length": 830.1953125, "completions/mean_terminated_length": 836.7322998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 396.0, "epoch": 0.20693333333333333, "grad_norm": 0.7657372355461121, "learning_rate": 1.9444444444444447e-07, "loss": 0.0052, "num_tokens": 55490250.0, "reward": 1.1953125, "reward_std": 0.1434774398803711, "rewards/accuracy_reward_step": 0.6953125, "rewards/format_reward_step": 1.0, "step": 194 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 15.44140625, "calib/ece": 0.1362055335968379, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.028996151804670967, "calib/mean_conf": 0.6756521739130434, "calib/mu_c": 0.6646496815286624, "calib/mu_w": 0.6936458333333334, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0956521739130435, "calib/std_conf": 0.0671679327575762, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5884119570294255, "calib/step_q_c_n": 2141.0, "calib/step_q_gap": -0.02956927917366514, "calib/step_q_w": 0.6179812362030906, "calib/step_q_w_n": 1812.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2339.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 861.03125, "completions/mean_terminated_length": 874.698486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 374.0, "epoch": 0.208, "grad_norm": 0.24327611923217773, "learning_rate": 1.6666666666666668e-07, "loss": -0.016, "num_tokens": 55816658.0, "reward": 1.107421875, "reward_std": 0.24357575178146362, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.98828125, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 12.58203125, "calib/ece": 0.11406249999999982, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01078947368421046, "calib/mean_conf": 0.655703125, "calib/mu_c": 0.6525, "calib/mu_w": 0.6632894736842104, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0333203125, "calib/std_conf": 0.044338604682989016, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5792476489028213, "calib/step_q_c_n": 2233.0, "calib/step_q_gap": -0.006470974578960065, "calib/step_q_w": 0.5857186234817814, "calib/step_q_w_n": 988.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2000.0, "completions/max_terminated_length": 2000.0, "completions/mean_length": 739.484375, "completions/mean_terminated_length": 745.3070678710938, "completions/min_length": 0.0, "completions/min_terminated_length": 368.0, "epoch": 0.20906666666666668, "grad_norm": 0.4479708671569824, "learning_rate": 1.3888888888888888e-07, "loss": 0.0002, "num_tokens": 56108510.0, "reward": 1.203125, "reward_std": 0.15190494060516357, "rewards/accuracy_reward_step": 0.703125, "rewards/format_reward_step": 1.0, "step": 196 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 14.3203125, "calib/ece": 0.05960784313725492, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.011764705882352941, "calib/gap": -0.014470895830466346, "calib/mean_conf": 0.6685490196078431, "calib/mu_c": 0.6636686390532545, "calib/mu_w": 0.6781395348837208, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03270588235294117, "calib/std_conf": 0.060803867822500295, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5914689507494646, "calib/step_q_c_n": 2335.0, "calib/step_q_gap": -0.020138862924464762, "calib/step_q_w": 0.6116078136739294, "calib/step_q_w_n": 1331.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2246.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 834.4453125, "completions/mean_terminated_length": 841.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 354.0, "epoch": 0.21013333333333334, "grad_norm": 0.24074199795722961, "learning_rate": 1.1111111111111112e-07, "loss": -0.018, "num_tokens": 56427184.0, "reward": 1.158203125, "reward_std": 0.19163835048675537, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.99609375, "step": 197 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 15.453125, "calib/ece": 0.1080408163265306, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.022599967621823036, "calib/mean_conf": 0.6618367346938775, "calib/mu_c": 0.655287356321839, "calib/mu_w": 0.677887323943662, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029836734693877567, "calib/std_conf": 0.06065201746018707, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5824627202892003, "calib/step_q_c_n": 2213.0, "calib/step_q_gap": -0.0915590812024808, "calib/step_q_w": 0.6740218014916811, "calib/step_q_w_n": 1743.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 781.96875, "completions/mean_terminated_length": 817.0775146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 306.0, "epoch": 0.2112, "grad_norm": 0.29830166697502136, "learning_rate": 8.333333333333334e-08, "loss": -0.0408, "num_tokens": 56732752.0, "reward": 1.158203125, "reward_std": 0.26918303966522217, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.95703125, "step": 198 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 14.7265625, "calib/ece": 0.2073279352226721, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.03895469769081594, "calib/mean_conf": 0.6691902834008097, "calib/mu_c": 0.6595698924731184, "calib/mu_w": 0.6985245901639343, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0617408906882591, "calib/std_conf": 0.07207840020445129, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5907238493723849, "calib/step_q_c_n": 2390.0, "calib/step_q_gap": -0.0657415129464557, "calib/step_q_w": 0.6564653623188406, "calib/step_q_w_n": 1380.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 811.703125, "completions/mean_terminated_length": 841.2793579101562, "completions/min_length": 0.0, "completions/min_terminated_length": 317.0, "epoch": 0.21226666666666666, "grad_norm": 0.3444276750087738, "learning_rate": 5.555555555555556e-08, "loss": -0.0413, "num_tokens": 57044748.0, "reward": 1.208984375, "reward_std": 0.21363122761249542, "rewards/accuracy_reward_step": 0.7265625, "rewards/format_reward_step": 0.96484375, "step": 199 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 14.8671875, "calib/ece": 0.20741035856573703, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.06286187845303881, "calib/mean_conf": 0.6726693227091634, "calib/mu_c": 0.6551381215469613, "calib/mu_w": 0.7180000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07948207171314742, "calib/std_conf": 0.07831674256807128, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5828472222222223, "calib/step_q_c_n": 2304.0, "calib/step_q_gap": -0.0629184235833703, "calib/step_q_w": 0.6457656458055926, "calib/step_q_w_n": 1502.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 855.87890625, "completions/mean_terminated_length": 872.9282836914062, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.21333333333333335, "grad_norm": 0.12666837871074677, "learning_rate": 2.777777777777778e-08, "loss": -0.0451, "num_tokens": 57371901.0, "reward": 1.197265625, "reward_std": 0.10271690785884857, "rewards/accuracy_reward_step": 0.70703125, "rewards/format_reward_step": 0.98046875, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": -0.012864412539638579, "train_runtime": 7417.8224, "train_samples_per_second": 6.902, "train_steps_per_second": 0.027 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 57371901, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }