{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.1015625, "calib/avg_num_step_conf": 0.42578125, "calib/ece": 0.8952941176470588, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9411764705882353, "calib/gap": 0.006249999999999978, "calib/mean_conf": 0.9541176470588235, "calib/mu_c": 0.96, "calib/mu_w": 0.95375, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.8952941176470588, "calib/std_conf": 0.044066637177242435, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.62, "calib/step_q_c_n": 4.0, "calib/step_q_gap": -0.12542857142857122, "calib/step_q_w": 0.7454285714285712, "calib/step_q_w_n": 105.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 622.23828125, "completions/mean_terminated_length": 674.9703369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.09340800344944, "learning_rate": 0.0, "loss": 0.0278, "num_tokens": 266877.0, "reward": 0.0234375, "reward_std": 0.04294103384017944, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.0390625, "step": 1 }, { "calib/answer_extract_rate": 0.1015625, "calib/avg_num_step_conf": 0.4375, "calib/ece": 0.6367055555555554, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": -0.05005833333333343, "calib/mean_conf": 0.8867055555555555, "calib/mu_c": 0.8533333333333334, "calib/mu_w": 0.9033916666666668, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.5950388888888888, "calib/std_conf": 0.27071644959301405, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.6966666666666667, "calib/step_q_c_n": 24.0, "calib/step_q_gap": -0.09584469696969677, "calib/step_q_w": 0.7925113636363634, "calib/step_q_w_n": 88.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.140625, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 608.22265625, "completions/mean_terminated_length": 707.75, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0021333333333333334, "grad_norm": 0.1929834485054016, "learning_rate": 2.5000000000000004e-07, "loss": -0.0217, "num_tokens": 525870.0, "reward": 0.05078125, "reward_std": 0.10960911214351654, "rewards/accuracy_reward_step": 0.0234375, "rewards/format_reward_step": 0.0546875, "step": 2 }, { "calib/answer_extract_rate": 0.05859375, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.851, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.03222222222222204, "calib/mean_conf": 0.9509999999999998, "calib/mu_c": 0.98, "calib/mu_w": 0.9477777777777779, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.851, "calib/std_conf": 0.05356304696336829, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8320000000000001, "calib/step_q_c_n": 10.0, "calib/step_q_gap": 5.970149253731183e-05, "calib/step_q_w": 0.8319402985074628, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 603.53515625, "completions/mean_terminated_length": 683.6504516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.10258053988218307, "learning_rate": 5.000000000000001e-07, "loss": 0.0015, "num_tokens": 785631.0, "reward": 0.021484375, "reward_std": 0.03884732350707054, "rewards/accuracy_reward_step": 0.0078125, "rewards/format_reward_step": 0.02734375, "step": 3 }, { "calib/answer_extract_rate": 0.04296875, "calib/avg_num_step_conf": 0.16015625, "calib/ece": 0.8185714285714286, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": -0.08833333333333337, "calib/mean_conf": 0.9157142857142857, "calib/mu_c": 0.84, "calib/mu_w": 0.9283333333333333, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.7957142857142857, "calib/std_conf": 0.08086269541762801, "calib/step_conf_rate": 0.0390625, "calib/step_q_c": 0.8725, "calib/step_q_c_n": 4.0, "calib/step_q_gap": 0.13087837837837846, "calib/step_q_w": 0.7416216216216216, "calib/step_q_w_n": 37.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 617.1484375, "completions/mean_terminated_length": 711.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.07202501595020294, "learning_rate": 7.5e-07, "loss": 0.0102, "num_tokens": 1049789.0, "reward": 0.015625, "reward_std": 0.04037860035896301, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.0234375, "step": 4 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.11328125, "calib/ece": 0.712857142857143, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": -0.07100000000000006, "calib/mean_conf": 0.9357142857142858, "calib/mu_c": 0.885, "calib/mu_w": 0.9560000000000001, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6814285714285715, "calib/std_conf": 0.06477590885002646, "calib/step_conf_rate": 0.02734375, "calib/step_q_c": 0.95625, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.24529761904761904, "calib/step_q_w": 0.710952380952381, "calib/step_q_w_n": 21.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 626.7421875, "completions/mean_terminated_length": 716.2767944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.10153383016586304, "learning_rate": 1.0000000000000002e-06, "loss": 0.009, "num_tokens": 1316923.0, "reward": 0.01953125, "reward_std": 0.04761157184839249, "rewards/accuracy_reward_step": 0.0078125, "rewards/format_reward_step": 0.0234375, "step": 5 }, { "calib/answer_extract_rate": 0.1015625, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.73524, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7619047619047619, "calib/gap": 0.11499777777777764, "calib/mean_conf": 0.878097142857143, "calib/mu_c": 0.9766666666666666, "calib/mu_w": 0.8616688888888889, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.73524, "calib/std_conf": 0.24146915049226425, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.7169999999999999, "calib/step_q_c_n": 10.0, "calib/step_q_gap": 0.008461025641025466, "calib/step_q_w": 0.7085389743589744, "calib/step_q_w_n": 117.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 527.4765625, "completions/mean_terminated_length": 577.0684204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.23361903429031372, "learning_rate": 1.25e-06, "loss": 0.0122, "num_tokens": 1557909.0, "reward": 0.044921875, "reward_std": 0.09989244490861893, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.06640625, "step": 6 }, { "calib/answer_extract_rate": 0.06640625, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.77125, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.625, "calib/gap": 0.004285714285714226, "calib/mean_conf": 0.89625, "calib/mu_c": 0.9, "calib/mu_w": 0.8957142857142858, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.77125, "calib/std_conf": 0.13056966531319592, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.7416666666666667, "calib/step_q_c_n": 6.0, "calib/step_q_gap": -0.08680790960451967, "calib/step_q_w": 0.8284745762711864, "calib/step_q_w_n": 59.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 673.59765625, "completions/mean_terminated_length": 780.2760620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.007466666666666667, "grad_norm": 0.06771083176136017, "learning_rate": 1.5e-06, "loss": 0.0274, "num_tokens": 1837774.0, "reward": 0.01171875, "reward_std": 0.0293300561606884, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.015625, "step": 7 }, { "calib/answer_extract_rate": 0.125, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.5172222222222222, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.024249999999999994, "calib/mean_conf": 0.9027777777777778, "calib/mu_c": 0.91625, "calib/mu_w": 0.892, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.4877777777777778, "calib/std_conf": 0.14417603266514767, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.5926666666666667, "calib/step_q_c_n": 15.0, "calib/step_q_gap": -0.22578403755868548, "calib/step_q_w": 0.8184507042253522, "calib/step_q_w_n": 71.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 626.35546875, "completions/mean_terminated_length": 670.907958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.1453000009059906, "learning_rate": 1.75e-06, "loss": 0.0087, "num_tokens": 2104633.0, "reward": 0.0546875, "reward_std": 0.11970394104719162, "rewards/accuracy_reward_step": 0.03125, "rewards/format_reward_step": 0.046875, "step": 8 }, { "calib/answer_extract_rate": 0.09375, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.9231578947368422, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.8947368421052632, "calib/mean_conf": 0.9231578947368421, "calib/mu_c": NaN, "calib/mu_w": 0.9231578947368421, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.9231578947368422, "calib/std_conf": 0.1707892709430311, "calib/step_conf_rate": 0.06640625, "calib/step_q_w": 0.7977333333333333, "calib/step_q_w_n": 75.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 634.09375, "completions/mean_terminated_length": 708.8558959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.1522383689880371, "learning_rate": 2.0000000000000003e-06, "loss": 0.0123, "num_tokens": 2374497.0, "reward": 0.03125, "reward_std": 0.054752420634031296, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.0546875, "step": 9 }, { "calib/answer_extract_rate": 0.0859375, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.6352941176470588, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.5294117647058824, "calib/gap": 0.034047619047619104, "calib/mean_conf": 0.7352941176470589, "calib/mu_c": 0.7633333333333333, "calib/mu_w": 0.7292857142857142, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.5970588235294118, "calib/std_conf": 0.32093828703201643, "calib/step_conf_rate": 0.0859375, "calib/step_q_c": 0.7142857142857144, "calib/step_q_c_n": 7.0, "calib/step_q_gap": 0.00017612524461851908, "calib/step_q_w": 0.7141095890410959, "calib/step_q_w_n": 73.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 633.73828125, "completions/mean_terminated_length": 690.3701782226562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.2347153276205063, "learning_rate": 2.25e-06, "loss": 0.0364, "num_tokens": 2643534.0, "reward": 0.04296875, "reward_std": 0.10383903235197067, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.0625, "step": 10 }, { "calib/answer_extract_rate": 0.18359375, "calib/avg_num_step_conf": 0.73046875, "calib/ece": 0.6881818181818182, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": -0.043194444444444424, "calib/mean_conf": 0.9003030303030304, "calib/mu_c": 0.8688888888888889, "calib/mu_w": 0.9120833333333334, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.2265625, "calib/nonempty_step_conf_rate": 0.15234375, "calib/pce": 0.657878787878788, "calib/std_conf": 0.20900173768285668, "calib/step_conf_rate": 0.15234375, "calib/step_q_c": 0.7881264705882352, "calib/step_q_c_n": 34.0, "calib/step_q_gap": 0.04878006535947699, "calib/step_q_w": 0.7393464052287582, "calib/step_q_w_n": 153.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 675.6875, "completions/mean_terminated_length": 723.7489624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.2045155018568039, "learning_rate": 2.5e-06, "loss": 0.0122, "num_tokens": 2920990.0, "reward": 0.0859375, "reward_std": 0.1808602213859558, "rewards/accuracy_reward_step": 0.03515625, "rewards/format_reward_step": 0.1015625, "step": 11 }, { "calib/answer_extract_rate": 0.20703125, "calib/avg_num_step_conf": 0.953125, "calib/ece": 0.5427272727272727, "calib/final_conf_rate": 0.171875, "calib/format_rate": 0.140625, "calib/frac_conf_gt_0.9": 0.6363636363636364, "calib/gap": -0.0040000000000000036, "calib/mean_conf": 0.8386363636363637, "calib/mu_c": 0.836, "calib/mu_w": 0.84, "calib/nonempty_final_conf_rate": 0.171875, "calib/nonempty_reasoning_rate": 0.2421875, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.5202272727272728, "calib/std_conf": 0.25541046206495027, "calib/step_conf_rate": 0.1953125, "calib/step_q_c": 0.8345333333333333, "calib/step_q_c_n": 75.0, "calib/step_q_gap": 0.07672268244575942, "calib/step_q_w": 0.7578106508875739, "calib/step_q_w_n": 169.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 581.84375, "completions/mean_terminated_length": 644.8138427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.2739364504814148, "learning_rate": 2.7500000000000004e-06, "loss": 0.0439, "num_tokens": 3174118.0, "reward": 0.12890625, "reward_std": 0.2127167135477066, "rewards/accuracy_reward_step": 0.05859375, "rewards/format_reward_step": 0.140625, "step": 12 }, { "calib/answer_extract_rate": 0.23828125, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.6739102564102565, "calib/final_conf_rate": 0.203125, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.7884615384615384, "calib/gap": 0.09339985218033986, "calib/mean_conf": 0.8854487179487178, "calib/mu_c": 0.959090909090909, "calib/mu_w": 0.8656910569105691, "calib/nonempty_final_conf_rate": 0.203125, "calib/nonempty_reasoning_rate": 0.2890625, "calib/nonempty_step_conf_rate": 0.21484375, "calib/pce": 0.6739102564102565, "calib/std_conf": 0.19547775011226887, "calib/step_conf_rate": 0.21484375, "calib/step_q_c": 0.7627272727272728, "calib/step_q_c_n": 44.0, "calib/step_q_gap": -0.017076666666666518, "calib/step_q_w": 0.7798039393939393, "calib/step_q_w_n": 220.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 591.5390625, "completions/mean_terminated_length": 615.5853271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.2713400721549988, "learning_rate": 3e-06, "loss": 0.0977, "num_tokens": 3430144.0, "reward": 0.123046875, "reward_std": 0.24303366243839264, "rewards/accuracy_reward_step": 0.04296875, "rewards/format_reward_step": 0.16015625, "step": 13 }, { "calib/answer_extract_rate": 0.3125, "calib/avg_num_step_conf": 1.22265625, "calib/ece": 0.6165846153846155, "calib/final_conf_rate": 0.25390625, "calib/format_rate": 0.20703125, "calib/frac_conf_gt_0.9": 0.7846153846153846, "calib/gap": 0.057099290780141754, "calib/mean_conf": 0.8830461538461538, "calib/mu_c": 0.9243333333333332, "calib/mu_w": 0.8672340425531915, "calib/nonempty_final_conf_rate": 0.25390625, "calib/nonempty_reasoning_rate": 0.37890625, "calib/nonempty_step_conf_rate": 0.28515625, "calib/pce": 0.6113538461538462, "calib/std_conf": 0.22442819175504194, "calib/step_conf_rate": 0.28515625, "calib/step_q_c": 0.7554666666666666, "calib/step_q_c_n": 75.0, "calib/step_q_gap": 0.022105322128851412, "calib/step_q_w": 0.7333613445378152, "calib/step_q_w_n": 238.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 521.171875, "completions/mean_terminated_length": 565.3389892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.014933333333333333, "grad_norm": 0.32171332836151123, "learning_rate": 3.2500000000000002e-06, "loss": 0.0173, "num_tokens": 3668964.0, "reward": 0.177734375, "reward_std": 0.24229903519153595, "rewards/accuracy_reward_step": 0.07421875, "rewards/format_reward_step": 0.20703125, "step": 14 }, { "calib/answer_extract_rate": 0.51953125, "calib/avg_num_step_conf": 2.54296875, "calib/ece": 0.6969990909090908, "calib/final_conf_rate": 0.4296875, "calib/format_rate": 0.35546875, "calib/frac_conf_gt_0.9": 0.7181818181818181, "calib/gap": 0.0034599999999999076, "calib/mean_conf": 0.9077263636363636, "calib/mu_c": 0.9103999999999999, "calib/mu_w": 0.90694, "calib/nonempty_final_conf_rate": 0.4296875, "calib/nonempty_reasoning_rate": 0.59375, "calib/nonempty_step_conf_rate": 0.48046875, "calib/pce": 0.6887263636363635, "calib/std_conf": 0.14853568207812298, "calib/step_conf_rate": 0.48046875, "calib/step_q_c": 0.718747663551402, "calib/step_q_c_n": 107.0, "calib/step_q_gap": -0.039332912428990285, "calib/step_q_w": 0.7580805759803922, "calib/step_q_w_n": 544.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 514.18359375, "completions/mean_terminated_length": 524.4263305664062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.39926886558532715, "learning_rate": 3.5e-06, "loss": 0.1415, "num_tokens": 3908475.0, "reward": 0.279296875, "reward_std": 0.3604772984981537, "rewards/accuracy_reward_step": 0.1015625, "rewards/format_reward_step": 0.35546875, "step": 15 }, { "calib/answer_extract_rate": 0.57421875, "calib/avg_num_step_conf": 3.03125, "calib/ece": 0.7074580152671756, "calib/final_conf_rate": 0.51171875, "calib/format_rate": 0.43359375, "calib/frac_conf_gt_0.9": 0.7786259541984732, "calib/gap": -0.07769666048237456, "calib/mean_conf": 0.9153969465648855, "calib/mu_c": 0.8572727272727273, "calib/mu_w": 0.9349693877551019, "calib/nonempty_final_conf_rate": 0.51171875, "calib/nonempty_reasoning_rate": 0.65625, "calib/nonempty_step_conf_rate": 0.55859375, "calib/pce": 0.6854732824427481, "calib/std_conf": 0.16302843284022775, "calib/step_conf_rate": 0.55859375, "calib/step_q_c": 0.756219512195122, "calib/step_q_c_n": 164.0, "calib/step_q_gap": -0.027582884319039325, "calib/step_q_w": 0.7838023965141613, "calib/step_q_w_n": 612.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 515.140625, "completions/mean_terminated_length": 531.758056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.3101026713848114, "learning_rate": 3.7500000000000005e-06, "loss": 0.0761, "num_tokens": 4149199.0, "reward": 0.353515625, "reward_std": 0.3571109175682068, "rewards/accuracy_reward_step": 0.13671875, "rewards/format_reward_step": 0.43359375, "step": 16 }, { "calib/answer_extract_rate": 0.703125, "calib/avg_num_step_conf": 3.8828125, "calib/ece": 0.6893592814371259, "calib/final_conf_rate": 0.65234375, "calib/format_rate": 0.58984375, "calib/frac_conf_gt_0.9": 0.7964071856287425, "calib/gap": -0.00263681102362201, "calib/mean_conf": 0.928880239520958, "calib/mu_c": 0.9268750000000001, "calib/mu_w": 0.9295118110236221, "calib/nonempty_final_conf_rate": 0.65234375, "calib/nonempty_reasoning_rate": 0.7734375, "calib/nonempty_step_conf_rate": 0.70703125, "calib/pce": 0.6893592814371259, "calib/std_conf": 0.10589311085660263, "calib/step_conf_rate": 0.70703125, "calib/step_q_c": 0.7367592592592592, "calib/step_q_c_n": 216.0, "calib/step_q_gap": -0.0004886841854707669, "calib/step_q_w": 0.73724794344473, "calib/step_q_w_n": 778.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 452.05859375, "completions/mean_terminated_length": 464.7670593261719, "completions/min_length": 0.0, "completions/min_terminated_length": 34.0, "epoch": 0.018133333333333335, "grad_norm": 0.3115542232990265, "learning_rate": 4.000000000000001e-06, "loss": 0.1478, "num_tokens": 4368454.0, "reward": 0.455078125, "reward_std": 0.4204365015029907, "rewards/accuracy_reward_step": 0.16015625, "rewards/format_reward_step": 0.58984375, "step": 17 }, { "calib/answer_extract_rate": 0.73828125, "calib/avg_num_step_conf": 4.1484375, "calib/ece": 0.637949438202247, "calib/final_conf_rate": 0.6953125, "calib/format_rate": 0.61328125, "calib/frac_conf_gt_0.9": 0.7078651685393258, "calib/gap": 0.030807852965747617, "calib/mean_conf": 0.8907584269662921, "calib/mu_c": 0.9137777777777777, "calib/mu_w": 0.8829699248120301, "calib/nonempty_final_conf_rate": 0.6953125, "calib/nonempty_reasoning_rate": 0.83203125, "calib/nonempty_step_conf_rate": 0.7421875, "calib/pce": 0.637949438202247, "calib/std_conf": 0.19176036386095893, "calib/step_conf_rate": 0.7421875, "calib/step_q_c": 0.7423841807909604, "calib/step_q_c_n": 177.0, "calib/step_q_gap": -0.010902937853107364, "calib/step_q_w": 0.7532871186440677, "calib/step_q_w_n": 885.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2624.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 407.09375, "completions/mean_terminated_length": 410.2992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.0192, "grad_norm": 0.3702068030834198, "learning_rate": 4.25e-06, "loss": 0.1734, "num_tokens": 4583390.0, "reward": 0.482421875, "reward_std": 0.42351776361465454, "rewards/accuracy_reward_step": 0.17578125, "rewards/format_reward_step": 0.61328125, "step": 18 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.6976190476190478, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.7532467532467533, "calib/gap": 0.02320110497237582, "calib/mean_conf": 0.9092207792207792, "calib/mu_c": 0.9274, "calib/mu_w": 0.9041988950276242, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.6951948051948054, "calib/std_conf": 0.1566126793817019, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.8100411985018727, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.08050163238617014, "calib/step_q_w": 0.7295395661157026, "calib/step_q_w_n": 968.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 291.68359375, "completions/mean_terminated_length": 291.68359375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.020266666666666665, "grad_norm": 0.3535246253013611, "learning_rate": 4.5e-06, "loss": 0.0078, "num_tokens": 4762821.0, "reward": 0.6171875, "reward_std": 0.372276246547699, "rewards/accuracy_reward_step": 0.19921875, "rewards/format_reward_step": 0.8359375, "step": 19 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 5.06640625, "calib/ece": 0.5800403225806453, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.7903225806451613, "calib/gap": 0.023405923344947843, "calib/mean_conf": 0.9123790322580645, "calib/mu_c": 0.927857142857143, "calib/mu_w": 0.9044512195121952, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.5768548387096776, "calib/std_conf": 0.16562803705027718, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.760523560209424, "calib/step_q_c_n": 382.0, "calib/step_q_gap": 0.008751975509970533, "calib/step_q_w": 0.7517715846994535, "calib/step_q_w_n": 915.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2428.0, "completions/max_terminated_length": 2428.0, "completions/mean_length": 289.0859375, "completions/mean_terminated_length": 289.0859375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.021333333333333333, "grad_norm": 0.27600613236427307, "learning_rate": 4.75e-06, "loss": 0.0353, "num_tokens": 4941699.0, "reward": 0.80078125, "reward_std": 0.3837167024612427, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.9375, "step": 20 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.6792874829931973, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.8367346938775511, "calib/gap": 0.017045490334332225, "calib/mean_conf": 0.9323487074829931, "calib/mu_c": 0.9450806451612903, "calib/mu_w": 0.9280351548269581, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.6792874829931973, "calib/std_conf": 0.1318469984727218, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.8108069620253165, "calib/step_q_c_n": 316.0, "calib/step_q_gap": 0.015945107074131104, "calib/step_q_w": 0.7948618549511854, "calib/step_q_w_n": 956.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 292.47265625, "completions/mean_terminated_length": 293.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.0224, "grad_norm": 0.262796014547348, "learning_rate": 5e-06, "loss": 0.0414, "num_tokens": 5119532.0, "reward": 0.703125, "reward_std": 0.37573766708374023, "rewards/accuracy_reward_step": 0.2421875, "rewards/format_reward_step": 0.921875, "step": 21 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.97265625, "calib/ece": 0.6300711462450593, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7865612648221344, "calib/gap": -0.0007543589743589374, "calib/mean_conf": 0.9320474308300395, "calib/mu_c": 0.9315256410256411, "calib/mu_w": 0.93228, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.626909090909091, "calib/std_conf": 0.10416734466703273, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8036150127226462, "calib/step_q_c_n": 393.0, "calib/step_q_gap": 0.004382058177191617, "calib/step_q_w": 0.7992329545454546, "calib/step_q_w_n": 880.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 753.0, "completions/max_terminated_length": 753.0, "completions/mean_length": 262.359375, "completions/mean_terminated_length": 263.38824462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.023466666666666667, "grad_norm": 0.23157238960266113, "learning_rate": 4.9722222222222224e-06, "loss": -0.0086, "num_tokens": 5288512.0, "reward": 0.783203125, "reward_std": 0.3809746205806732, "rewards/accuracy_reward_step": 0.3046875, "rewards/format_reward_step": 0.95703125, "step": 22 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.6420756097560976, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8414634146341463, "calib/gap": 0.016442911877394772, "calib/mean_conf": 0.9347585365853659, "calib/mu_c": 0.946388888888889, "calib/mu_w": 0.9299459770114942, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6420756097560976, "calib/std_conf": 0.12590850202592063, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.8159972899728999, "calib/step_q_c_n": 369.0, "calib/step_q_gap": 0.012226988348770007, "calib/step_q_w": 0.8037703016241299, "calib/step_q_w_n": 862.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1923.0, "completions/max_terminated_length": 1923.0, "completions/mean_length": 278.5, "completions/mean_terminated_length": 279.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.024533333333333334, "grad_norm": 0.22491341829299927, "learning_rate": 4.944444444444445e-06, "loss": 0.0016, "num_tokens": 5463744.0, "reward": 0.755859375, "reward_std": 0.3748607039451599, "rewards/accuracy_reward_step": 0.28515625, "rewards/format_reward_step": 0.94140625, "step": 23 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.7062290836653384, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.8326693227091634, "calib/gap": -0.008835951134380515, "calib/mean_conf": 0.9408904382470119, "calib/mu_c": 0.9341666666666666, "calib/mu_w": 0.9430026178010471, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.7040378486055775, "calib/std_conf": 0.11581519894633541, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.782109090909091, "calib/step_q_c_n": 275.0, "calib/step_q_gap": -0.019002779594506136, "calib/step_q_w": 0.8011118705035971, "calib/step_q_w_n": 1112.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2370.0, "completions/max_terminated_length": 2370.0, "completions/mean_length": 296.42578125, "completions/mean_terminated_length": 296.42578125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 0.0256, "grad_norm": 0.17748171091079712, "learning_rate": 4.9166666666666665e-06, "loss": 0.0558, "num_tokens": 5644141.0, "reward": 0.71875, "reward_std": 0.30310535430908203, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.96875, "step": 24 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.66048031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8503937007874016, "calib/gap": -0.004199099099099213, "calib/mean_conf": 0.9465433070866143, "calib/mu_c": 0.9435675675675675, "calib/mu_w": 0.9477666666666668, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6578425196850394, "calib/std_conf": 0.07255448736161858, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8115143222506394, "calib/step_q_c_n": 391.0, "calib/step_q_gap": -0.008937913521718466, "calib/step_q_w": 0.8204522357723578, "calib/step_q_w_n": 984.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 772.0, "completions/max_terminated_length": 772.0, "completions/mean_length": 279.05859375, "completions/mean_terminated_length": 280.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.02666666666666667, "grad_norm": 0.2226199209690094, "learning_rate": 4.888888888888889e-06, "loss": 0.0067, "num_tokens": 5818804.0, "reward": 0.783203125, "reward_std": 0.3985111117362976, "rewards/accuracy_reward_step": 0.296875, "rewards/format_reward_step": 0.97265625, "step": 25 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.6369146825396825, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9206349206349206, "calib/gap": 0.00606420373027261, "calib/mean_conf": 0.9623115079365079, "calib/mu_c": 0.9664024390243903, "calib/mu_w": 0.9603382352941177, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6369146825396825, "calib/std_conf": 0.03689188390047501, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8371509846827133, "calib/step_q_c_n": 457.0, "calib/step_q_gap": 0.008553173963136707, "calib/step_q_w": 0.8285978107195766, "calib/step_q_w_n": 945.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2280.0, "completions/max_terminated_length": 2280.0, "completions/mean_length": 311.203125, "completions/mean_terminated_length": 311.203125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.027733333333333332, "grad_norm": 0.18909384310245514, "learning_rate": 4.861111111111111e-06, "loss": 0.0547, "num_tokens": 6003712.0, "reward": 0.8125, "reward_std": 0.3554888665676117, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.984375, "step": 26 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.875, "calib/ece": 0.6326521739130435, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8893280632411067, "calib/gap": -0.0024731396172925635, "calib/mean_conf": 0.9575533596837943, "calib/mu_c": 0.9558915662650602, "calib/mu_w": 0.9583647058823528, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6310711462450593, "calib/std_conf": 0.043072717704501245, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8300023752969121, "calib/step_q_c_n": 421.0, "calib/step_q_gap": 0.001743649535139169, "calib/step_q_w": 0.828258725761773, "calib/step_q_w_n": 1083.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 310.98046875, "completions/mean_terminated_length": 310.98046875, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.0288, "grad_norm": 0.2512206733226776, "learning_rate": 4.833333333333333e-06, "loss": 0.0554, "num_tokens": 6188539.0, "reward": 0.81640625, "reward_std": 0.3243214786052704, "rewards/accuracy_reward_step": 0.32421875, "rewards/format_reward_step": 0.984375, "step": 27 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.72265625, "calib/ece": 0.5212460317460319, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9404761904761905, "calib/gap": -0.005501785714285834, "calib/mean_conf": 0.9656904761904762, "calib/mu_c": 0.9626339285714286, "calib/mu_w": 0.9681357142857144, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5212460317460319, "calib/std_conf": 0.03520787797072999, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8592661157024793, "calib/step_q_c_n": 605.0, "calib/step_q_gap": 0.007533557562944493, "calib/step_q_w": 0.8517325581395349, "calib/step_q_w_n": 860.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2449.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 343.22265625, "completions/mean_terminated_length": 343.22265625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.029866666666666666, "grad_norm": 0.18749113380908966, "learning_rate": 4.805555555555556e-06, "loss": 0.0048, "num_tokens": 6383348.0, "reward": 0.931640625, "reward_std": 0.3386801481246948, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 0.98046875, "step": 28 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.6446484, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.924, "calib/gap": 0.009045384558442726, "calib/mean_conf": 0.9606484000000001, "calib/mu_c": 0.9668354430379748, "calib/mu_w": 0.957790058479532, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6446484, "calib/std_conf": 0.06880537491678976, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8503131524008349, "calib/step_q_c_n": 479.0, "calib/step_q_gap": 0.004799661675708444, "calib/step_q_w": 0.8455134907251265, "calib/step_q_w_n": 1186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 402.20703125, "completions/mean_terminated_length": 403.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.030933333333333334, "grad_norm": 0.18767628073692322, "learning_rate": 4.777777777777778e-06, "loss": 0.0569, "num_tokens": 6593441.0, "reward": 0.796875, "reward_std": 0.32658594846725464, "rewards/accuracy_reward_step": 0.30859375, "rewards/format_reward_step": 0.9765625, "step": 29 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.53125, "calib/ece": 0.6063754940711462, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9051383399209486, "calib/gap": -0.000441581458759166, "calib/mean_conf": 0.962106719367589, "calib/mu_c": 0.9618222222222224, "calib/mu_w": 0.9622638036809815, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6063754940711462, "calib/std_conf": 0.0342245617471847, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8520050966608084, "calib/step_q_c_n": 569.0, "calib/step_q_gap": 0.007608723134063022, "calib/step_q_w": 0.8443963735267453, "calib/step_q_w_n": 1103.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2346.0, "completions/max_terminated_length": 2346.0, "completions/mean_length": 406.7890625, "completions/mean_terminated_length": 406.7890625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.032, "grad_norm": 0.18368232250213623, "learning_rate": 4.75e-06, "loss": 0.0472, "num_tokens": 6804563.0, "reward": 0.841796875, "reward_std": 0.37309718132019043, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.98046875, "step": 30 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.34765625, "calib/ece": 0.5997330677290836, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9322709163346613, "calib/gap": -0.001713043478260956, "calib/mean_conf": 0.9582988047808766, "calib/mu_c": 0.9571999999999998, "calib/mu_w": 0.9589130434782608, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5997330677290836, "calib/std_conf": 0.067515457567313, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8475205128205129, "calib/step_q_c_n": 585.0, "calib/step_q_gap": -0.014571693969610666, "calib/step_q_w": 0.8620922067901235, "calib/step_q_w_n": 1296.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 436.0078125, "completions/mean_terminated_length": 436.0078125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.03306666666666667, "grad_norm": 0.14242278039455414, "learning_rate": 4.722222222222222e-06, "loss": 0.0464, "num_tokens": 7022093.0, "reward": 0.845703125, "reward_std": 0.2595009207725525, "rewards/accuracy_reward_step": 0.35546875, "rewards/format_reward_step": 0.98046875, "step": 31 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.703125, "calib/ece": 0.5951372549019607, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9019607843137255, "calib/gap": 0.0041697786076286825, "calib/mean_conf": 0.955921568627451, "calib/mu_c": 0.9585869565217391, "calib/mu_w": 0.9544171779141104, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5951372549019607, "calib/std_conf": 0.06854677611081697, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.848257042253521, "calib/step_q_c_n": 568.0, "calib/step_q_gap": 0.009546240859792943, "calib/step_q_w": 0.8387108013937281, "calib/step_q_w_n": 1148.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2281.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 395.75, "completions/mean_terminated_length": 395.75, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.034133333333333335, "grad_norm": 0.16663441061973572, "learning_rate": 4.694444444444445e-06, "loss": -0.0072, "num_tokens": 7230109.0, "reward": 0.85546875, "reward_std": 0.19465388357639313, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.9921875, "step": 32 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.01953125, "calib/ece": 0.594716, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.956, "calib/gap": 0.0006919389082938876, "calib/mean_conf": 0.966716, "calib/mu_c": 0.9671505376344084, "calib/mu_w": 0.9664585987261145, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.594716, "calib/std_conf": 0.03542817161525556, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8711590909090909, "calib/step_q_c_n": 660.0, "calib/step_q_gap": 0.006741382026065557, "calib/step_q_w": 0.8644177088830254, "calib/step_q_w_n": 1137.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 441.78515625, "completions/mean_terminated_length": 443.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.0352, "grad_norm": 0.16815558075904846, "learning_rate": 4.666666666666667e-06, "loss": 0.038, "num_tokens": 7450078.0, "reward": 0.849609375, "reward_std": 0.2716839909553528, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.97265625, "step": 33 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.5414941176470589, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9372549019607843, "calib/gap": 8.767951625077153e-05, "calib/mean_conf": 0.9650235294117647, "calib/mu_c": 0.965074074074074, "calib/mu_w": 0.9649863945578232, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5414941176470589, "calib/std_conf": 0.025525677869167495, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8557174556213019, "calib/step_q_c_n": 676.0, "calib/step_q_gap": 0.005662855014628487, "calib/step_q_w": 0.8500546006066734, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 379.390625, "completions/mean_terminated_length": 379.390625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.03626666666666667, "grad_norm": 0.2050214260816574, "learning_rate": 4.638888888888889e-06, "loss": 0.0256, "num_tokens": 7652314.0, "reward": 0.919921875, "reward_std": 0.3066937327384949, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.99609375, "step": 34 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.3359375, "calib/ece": 0.5662908366533868, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9442231075697212, "calib/gap": 0.00823278145695372, "calib/mean_conf": 0.9646972111553785, "calib/mu_c": 0.9696499999999999, "calib/mu_w": 0.9614172185430462, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5662908366533868, "calib/std_conf": 0.04091254826783785, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8770158959537572, "calib/step_q_c_n": 692.0, "calib/step_q_gap": 0.016350634570957978, "calib/step_q_w": 0.8606652613827992, "calib/step_q_w_n": 1186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2759.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 460.00390625, "completions/mean_terminated_length": 461.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.037333333333333336, "grad_norm": 0.19532760977745056, "learning_rate": 4.611111111111112e-06, "loss": 0.0054, "num_tokens": 7879331.0, "reward": 0.884765625, "reward_std": 0.39114731550216675, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.98046875, "step": 35 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.765625, "calib/ece": 0.369897233201581, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9565217391304348, "calib/gap": -0.004632450331125804, "calib/mean_conf": 0.9667351778656127, "calib/mu_c": 0.9648675496688741, "calib/mu_w": 0.9694999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.369897233201581, "calib/std_conf": 0.02525124791789609, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8713951120162933, "calib/step_q_c_n": 982.0, "calib/step_q_gap": -0.010074221317040055, "calib/step_q_w": 0.8814693333333333, "calib/step_q_w_n": 750.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 447.75390625, "completions/mean_terminated_length": 447.75390625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.0384, "grad_norm": 0.18701352179050446, "learning_rate": 4.583333333333333e-06, "loss": 0.0439, "num_tokens": 8096668.0, "reward": 1.083984375, "reward_std": 0.2993289530277252, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.98828125, "step": 36 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 7.5390625, "calib/ece": 0.545100806451613, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9717741935483871, "calib/gap": -0.006544455544455374, "calib/mean_conf": 0.9684879032258066, "calib/mu_c": 0.9647142857142856, "calib/mu_w": 0.971258741258741, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.545100806451613, "calib/std_conf": 0.022842020886606838, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8704848046309698, "calib/step_q_c_n": 691.0, "calib/step_q_gap": -0.013976050897682502, "calib/step_q_w": 0.8844608555286523, "calib/step_q_w_n": 1239.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2587.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 471.2265625, "completions/mean_terminated_length": 476.8142395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.039466666666666664, "grad_norm": 0.1921999752521515, "learning_rate": 4.555555555555556e-06, "loss": -0.0012, "num_tokens": 8324398.0, "reward": 0.89453125, "reward_std": 0.276373028755188, "rewards/accuracy_reward_step": 0.41015625, "rewards/format_reward_step": 0.96875, "step": 37 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.5370607287449393, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.97165991902834, "calib/gap": -0.004610778044231156, "calib/mean_conf": 0.9743076923076923, "calib/mu_c": 0.9717129629629631, "calib/mu_w": 0.9763237410071942, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5370607287449393, "calib/std_conf": 0.020714232172895357, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8914092140921409, "calib/step_q_c_n": 738.0, "calib/step_q_gap": -0.009175075404770028, "calib/step_q_w": 0.9005842894969109, "calib/step_q_w_n": 1133.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 495.8125, "completions/mean_terminated_length": 499.7165222167969, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.04053333333333333, "grad_norm": 0.19463488459587097, "learning_rate": 4.527777777777778e-06, "loss": 0.0497, "num_tokens": 8558214.0, "reward": 0.904296875, "reward_std": 0.34605079889297485, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.96484375, "step": 38 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.4609375, "calib/ece": 0.532296442687747, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9841897233201581, "calib/gap": -0.0008047112462005179, "calib/mean_conf": 0.9749841897233201, "calib/mu_c": 0.9745357142857144, "calib/mu_w": 0.9753404255319149, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.532296442687747, "calib/std_conf": 0.020270496557005274, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8955542307692309, "calib/step_q_c_n": 780.0, "calib/step_q_gap": -0.006108601089176213, "calib/step_q_w": 0.9016628318584071, "calib/step_q_w_n": 1130.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2264.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 521.78125, "completions/mean_terminated_length": 521.78125, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.0416, "grad_norm": 0.1901213526725769, "learning_rate": 4.5e-06, "loss": -0.0001, "num_tokens": 8797878.0, "reward": 0.931640625, "reward_std": 0.25382548570632935, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.98828125, "step": 39 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 6.87109375, "calib/ece": 0.5636244897959185, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9918367346938776, "calib/gap": -0.0031860002742353233, "calib/mean_conf": 0.9758693877551021, "calib/mu_c": 0.9740098039215687, "calib/mu_w": 0.977195804195804, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5615836734693879, "calib/std_conf": 0.03475816277102392, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9029054913294798, "calib/step_q_c_n": 692.0, "calib/step_q_gap": 0.0015829046378210698, "calib/step_q_w": 0.9013225866916588, "calib/step_q_w_n": 1067.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 533.328125, "completions/mean_terminated_length": 533.328125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.042666666666666665, "grad_norm": 0.20792528986930847, "learning_rate": 4.472222222222223e-06, "loss": 0.0438, "num_tokens": 9041170.0, "reward": 0.873046875, "reward_std": 0.35490143299102783, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.94921875, "step": 40 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 6.98828125, "calib/ece": 0.27429435483870956, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9959677419354839, "calib/gap": -0.003156104380242053, "calib/mean_conf": 0.9759072580645163, "calib/mu_c": 0.9749655172413794, "calib/mu_w": 0.9781216216216214, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27429435483870956, "calib/std_conf": 0.01858425310443043, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8983594827586207, "calib/step_q_c_n": 1160.0, "calib/step_q_gap": -0.009166749355846826, "calib/step_q_w": 0.9075262321144675, "calib/step_q_w_n": 629.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2332.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 488.15625, "completions/mean_terminated_length": 488.15625, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 0.04373333333333333, "grad_norm": 0.2168932557106018, "learning_rate": 4.444444444444444e-06, "loss": 0.0576, "num_tokens": 9273386.0, "reward": 1.1640625, "reward_std": 0.3478030860424042, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.96875, "step": 41 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.26171875, "calib/ece": 0.484244, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.992, "calib/gap": -0.0009284296779974444, "calib/mean_conf": 0.976244, "calib/mu_c": 0.9757723577235772, "calib/mu_w": 0.9767007874015746, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.484244, "calib/std_conf": 0.016944865416992853, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8981546894031669, "calib/step_q_c_n": 821.0, "calib/step_q_gap": -0.00843201579914532, "calib/step_q_w": 0.9065867052023122, "calib/step_q_w_n": 1038.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2621.0, "completions/max_terminated_length": 2621.0, "completions/mean_length": 448.59765625, "completions/mean_terminated_length": 452.1299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.0448, "grad_norm": 0.2360077202320099, "learning_rate": 4.416666666666667e-06, "loss": 0.0183, "num_tokens": 9492595.0, "reward": 0.96875, "reward_std": 0.3092150092124939, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.9765625, "step": 42 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 7.18359375, "calib/ece": 0.5074163265306122, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9918367346938776, "calib/gap": -0.0011056856187290354, "calib/mean_conf": 0.9768040816326531, "calib/mu_c": 0.9762173913043478, "calib/mu_w": 0.9773230769230768, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5074163265306122, "calib/std_conf": 0.01736364909432598, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8979627163781623, "calib/step_q_c_n": 751.0, "calib/step_q_gap": -0.013234893915955448, "calib/step_q_w": 0.9111976102941177, "calib/step_q_w_n": 1088.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2490.0, "completions/max_terminated_length": 2490.0, "completions/mean_length": 525.7578125, "completions/mean_terminated_length": 534.1032104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.04586666666666667, "grad_norm": 0.2095789611339569, "learning_rate": 4.388888888888889e-06, "loss": 0.0273, "num_tokens": 9732413.0, "reward": 0.923828125, "reward_std": 0.3529389798641205, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.94921875, "step": 43 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 7.16796875, "calib/ece": 0.603582995951417, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9959514170040485, "calib/gap": -0.00047276916631766586, "calib/mean_conf": 0.9801012145748987, "calib/mu_c": 0.979806451612903, "calib/mu_w": 0.9802792207792207, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.603582995951417, "calib/std_conf": 0.015421010647003998, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9029935897435899, "calib/step_q_c_n": 624.0, "calib/step_q_gap": -0.0022454688856423655, "calib/step_q_w": 0.9052390586292323, "calib/step_q_w_n": 1211.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2350.0, "completions/max_terminated_length": 2350.0, "completions/mean_length": 543.63671875, "completions/mean_terminated_length": 545.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.046933333333333334, "grad_norm": 0.19954414665699005, "learning_rate": 4.361111111111112e-06, "loss": 0.0314, "num_tokens": 9977904.0, "reward": 0.83984375, "reward_std": 0.2857242524623871, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.953125, "step": 44 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.359375, "calib/ece": 0.5497738095238096, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9920634920634921, "calib/gap": 0.0020046296296295507, "calib/mean_conf": 0.9783452380952381, "calib/mu_c": 0.9794907407407407, "calib/mu_w": 0.9774861111111112, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5497738095238096, "calib/std_conf": 0.016030596277331763, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9021262002743485, "calib/step_q_c_n": 729.0, "calib/step_q_gap": -0.005504102755954587, "calib/step_q_w": 0.9076303030303031, "calib/step_q_w_n": 1155.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 503.96484375, "completions/mean_terminated_length": 505.9411926269531, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.048, "grad_norm": 0.21172115206718445, "learning_rate": 4.333333333333334e-06, "loss": 0.0142, "num_tokens": 10211967.0, "reward": 0.9140625, "reward_std": 0.3250906765460968, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.984375, "step": 45 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 7.8984375, "calib/ece": 0.507512396694215, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.987603305785124, "calib/gap": 0.002683405364615532, "calib/mean_conf": 0.9744545454545456, "calib/mu_c": 0.9758849557522125, "calib/mu_w": 0.9732015503875969, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.507512396694215, "calib/std_conf": 0.034341623541091014, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9051211734693876, "calib/step_q_c_n": 784.0, "calib/step_q_gap": -0.009437106013649554, "calib/step_q_w": 0.9145582794830371, "calib/step_q_w_n": 1238.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 591.01171875, "completions/mean_terminated_length": 593.3294677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.04906666666666667, "grad_norm": 0.1701515018939972, "learning_rate": 4.305555555555556e-06, "loss": 0.0433, "num_tokens": 10468034.0, "reward": 0.9140625, "reward_std": 0.2716418504714966, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 0.9453125, "step": 46 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.078125, "calib/ece": 0.38388114754098346, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0013972135144549869, "calib/mean_conf": 0.9781434426229507, "calib/mu_c": 0.9787103448275862, "calib/mu_w": 0.9773131313131312, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.38388114754098346, "calib/std_conf": 0.014596482215464805, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9026735653809972, "calib/step_q_c_n": 1063.0, "calib/step_q_gap": -0.015431907255818667, "calib/step_q_w": 0.9181054726368159, "calib/step_q_w_n": 1005.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2569.0, "completions/max_terminated_length": 2569.0, "completions/mean_length": 592.296875, "completions/mean_terminated_length": 592.296875, "completions/min_length": 209.0, "completions/min_terminated_length": 209.0, "epoch": 0.050133333333333335, "grad_norm": 0.18556036055088043, "learning_rate": 4.277777777777778e-06, "loss": 0.0751, "num_tokens": 10725638.0, "reward": 1.044921875, "reward_std": 0.3660392761230469, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.94921875, "step": 47 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.3125, "calib/ece": 0.506847389558233, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9919678714859438, "calib/gap": -0.001242229992230004, "calib/mean_conf": 0.9767269076305222, "calib/mu_c": 0.976068376068376, "calib/mu_w": 0.977310606060606, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.506847389558233, "calib/std_conf": 0.017063480546151713, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.9031273644388399, "calib/step_q_c_n": 793.0, "calib/step_q_gap": -0.0056863519652379235, "calib/step_q_w": 0.9088137164040778, "calib/step_q_w_n": 1079.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2872.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 531.93359375, "completions/mean_terminated_length": 531.93359375, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.0512, "grad_norm": 0.2239966094493866, "learning_rate": 4.25e-06, "loss": 0.0354, "num_tokens": 10965501.0, "reward": 0.943359375, "reward_std": 0.3430543541908264, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.97265625, "step": 48 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.421875, "calib/ece": 0.4573067729083665, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9920318725099602, "calib/gap": 0.0014112523839797664, "calib/mean_conf": 0.9752350597609561, "calib/mu_c": 0.9759153846153847, "calib/mu_w": 0.974504132231405, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4573067729083665, "calib/std_conf": 0.018632227090474466, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9006391752577318, "calib/step_q_c_n": 970.0, "calib/step_q_gap": 0.004569282784613549, "calib/step_q_w": 0.8960698924731183, "calib/step_q_w_n": 930.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2752.0, "completions/max_terminated_length": 2752.0, "completions/mean_length": 535.91015625, "completions/mean_terminated_length": 535.91015625, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.05226666666666667, "grad_norm": 0.15744777023792267, "learning_rate": 4.222222222222223e-06, "loss": 0.0511, "num_tokens": 11207230.0, "reward": 0.998046875, "reward_std": 0.24411951005458832, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.98046875, "step": 49 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.3203125, "calib/ece": 0.44008000000000014, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": 0.0008572713835869328, "calib/mean_conf": 0.9720800000000002, "calib/mu_c": 0.9724812030075186, "calib/mu_w": 0.9716239316239317, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.44008000000000014, "calib/std_conf": 0.01713690753899315, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8968477546549836, "calib/step_q_c_n": 913.0, "calib/step_q_gap": -0.0060774713105054, "calib/step_q_w": 0.902925225965489, "calib/step_q_w_n": 1217.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2766.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 562.51953125, "completions/mean_terminated_length": 569.1897583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.05333333333333334, "grad_norm": 0.1873403787612915, "learning_rate": 4.194444444444445e-06, "loss": 0.0303, "num_tokens": 11456595.0, "reward": 1.0078125, "reward_std": 0.3558337688446045, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.9765625, "step": 50 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.66796875, "calib/ece": 0.41886746987951806, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9959839357429718, "calib/gap": -0.0027575401488444395, "calib/mean_conf": 0.9730843373493976, "calib/mu_c": 0.971855072463768, "calib/mu_w": 0.9746126126126125, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41886746987951806, "calib/std_conf": 0.015309029400566207, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8945123456790123, "calib/step_q_c_n": 972.0, "calib/step_q_gap": -0.005328219406759649, "calib/step_q_w": 0.8998405650857719, "calib/step_q_w_n": 991.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 591.3359375, "completions/mean_terminated_length": 600.7222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.0544, "grad_norm": 0.16895967721939087, "learning_rate": 4.166666666666667e-06, "loss": 0.0259, "num_tokens": 11717273.0, "reward": 1.025390625, "reward_std": 0.27671748399734497, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.97265625, "step": 51 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.03125, "calib/ece": 0.3673769841269843, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9880952380952381, "calib/gap": -0.0009092105263159844, "calib/mean_conf": 0.9705515873015874, "calib/mu_c": 0.9701907894736842, "calib/mu_w": 0.9711000000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3673769841269843, "calib/std_conf": 0.017846827267394613, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8935311890838206, "calib/step_q_c_n": 1026.0, "calib/step_q_gap": -0.0045415930221225365, "calib/step_q_w": 0.8980727821059431, "calib/step_q_w_n": 774.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 581.5078125, "completions/mean_terminated_length": 583.7882690429688, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.055466666666666664, "grad_norm": 0.1962101310491562, "learning_rate": 4.138888888888889e-06, "loss": 0.0641, "num_tokens": 11974091.0, "reward": 1.08203125, "reward_std": 0.3331637382507324, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.9765625, "step": 52 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.39697333333333334, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.992, "calib/gap": 0.005176350129621143, "calib/mean_conf": 0.9689733333333334, "calib/mu_c": 0.9711888111888112, "calib/mu_w": 0.96601246105919, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.39697333333333334, "calib/std_conf": 0.043638557874124725, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8942307692307693, "calib/step_q_c_n": 988.0, "calib/step_q_gap": -0.0021237041554141856, "calib/step_q_w": 0.8963544733861835, "calib/step_q_w_n": 883.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2854.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 591.51953125, "completions/mean_terminated_length": 596.1771850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.05653333333333333, "grad_norm": 0.1859111487865448, "learning_rate": 4.111111111111111e-06, "loss": 0.0109, "num_tokens": 12231344.0, "reward": 1.04296875, "reward_std": 0.34486693143844604, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.96875, "step": 53 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.765625, "calib/ece": 0.33599206349206356, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9880952380952381, "calib/gap": -0.006206127003449025, "calib/mean_conf": 0.9669444444444445, "calib/mu_c": 0.9646540880503144, "calib/mu_w": 0.9708602150537634, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33599206349206356, "calib/std_conf": 0.01889268169204031, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.890474060822898, "calib/step_q_c_n": 1118.0, "calib/step_q_gap": -0.012905249521929507, "calib/step_q_w": 0.9033793103448275, "calib/step_q_w_n": 870.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 556.73046875, "completions/mean_terminated_length": 558.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.0576, "grad_norm": 0.18028347194194794, "learning_rate": 4.083333333333334e-06, "loss": 0.0789, "num_tokens": 12480099.0, "reward": 1.109375, "reward_std": 0.26777738332748413, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9765625, "step": 54 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.46875, "calib/ece": 0.5246586345381526, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9839357429718876, "calib/gap": 0.002404325032765553, "calib/mean_conf": 0.9624096385542168, "calib/mu_c": 0.9637614678899082, "calib/mu_w": 0.9613571428571427, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5246586345381526, "calib/std_conf": 0.021505039744383402, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8906507304116865, "calib/step_q_c_n": 753.0, "calib/step_q_gap": -0.005639174678908732, "calib/step_q_w": 0.8962899050905953, "calib/step_q_w_n": 1159.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 562.30078125, "completions/mean_terminated_length": 568.9683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.058666666666666666, "grad_norm": 0.182333305478096, "learning_rate": 4.055555555555556e-06, "loss": 0.0163, "num_tokens": 12731872.0, "reward": 0.912109375, "reward_std": 0.27818602323532104, "rewards/accuracy_reward_step": 0.42578125, "rewards/format_reward_step": 0.97265625, "step": 55 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.05078125, "calib/ece": 0.5173673469387754, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9959183673469387, "calib/gap": -0.005001683501683396, "calib/mean_conf": 0.9663469387755101, "calib/mu_c": 0.9635909090909092, "calib/mu_w": 0.9685925925925926, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5173673469387754, "calib/std_conf": 0.018311980042526377, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.894064039408867, "calib/step_q_c_n": 812.0, "calib/step_q_gap": -0.0012281943781625726, "calib/step_q_w": 0.8952922337870296, "calib/step_q_w_n": 1249.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 634.90234375, "completions/mean_terminated_length": 637.3922119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.05973333333333333, "grad_norm": 0.15238536894321442, "learning_rate": 4.027777777777779e-06, "loss": 0.0984, "num_tokens": 13001247.0, "reward": 0.904296875, "reward_std": 0.2974989414215088, "rewards/accuracy_reward_step": 0.4296875, "rewards/format_reward_step": 0.94921875, "step": 56 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 8.68359375, "calib/ece": 0.3995247933884297, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9917355371900827, "calib/gap": 0.003234271810914402, "calib/mean_conf": 0.9656404958677686, "calib/mu_c": 0.9670437956204381, "calib/mu_w": 0.9638095238095237, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3995247933884297, "calib/std_conf": 0.022542496428050295, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8948071359691417, "calib/step_q_c_n": 1037.0, "calib/step_q_gap": -0.018253572293927434, "calib/step_q_w": 0.9130607082630692, "calib/step_q_w_n": 1186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 637.07421875, "completions/mean_terminated_length": 649.7649536132812, "completions/min_length": 0.0, "completions/min_terminated_length": 305.0, "epoch": 0.0608, "grad_norm": 0.16103595495224, "learning_rate": 4.000000000000001e-06, "loss": 0.0348, "num_tokens": 13271130.0, "reward": 1.0078125, "reward_std": 0.33539533615112305, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.9453125, "step": 57 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.45600000000000007, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9877551020408163, "calib/gap": 0.0002366666666666628, "calib/mean_conf": 0.9662040816326531, "calib/mu_c": 0.9663200000000001, "calib/mu_w": 0.9660833333333334, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.45600000000000007, "calib/std_conf": 0.019812364302592516, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8964467005076142, "calib/step_q_c_n": 985.0, "calib/step_q_gap": 0.006100696608978828, "calib/step_q_w": 0.8903460038986354, "calib/step_q_w_n": 1026.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 682.671875, "completions/mean_terminated_length": 693.5079956054688, "completions/min_length": 0.0, "completions/min_terminated_length": 281.0, "epoch": 0.06186666666666667, "grad_norm": 0.1501203328371048, "learning_rate": 3.972222222222223e-06, "loss": 0.0261, "num_tokens": 13552214.0, "reward": 0.96484375, "reward_std": 0.35837113857269287, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.953125, "step": 58 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.4594758064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9959677419354839, "calib/gap": -0.00013723577235780215, "calib/mean_conf": 0.963508064516129, "calib/mu_c": 0.9634400000000001, "calib/mu_w": 0.9635772357723579, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4594758064516129, "calib/std_conf": 0.019842929986457408, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.895260180995475, "calib/step_q_c_n": 884.0, "calib/step_q_gap": -0.007169725546581129, "calib/step_q_w": 0.9024299065420561, "calib/step_q_w_n": 1070.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2862.0, "completions/max_terminated_length": 2862.0, "completions/mean_length": 660.53515625, "completions/mean_terminated_length": 663.1255493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.06293333333333333, "grad_norm": 0.1628417819738388, "learning_rate": 3.944444444444445e-06, "loss": 0.0698, "num_tokens": 13827559.0, "reward": 0.970703125, "reward_std": 0.313901424407959, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.96484375, "step": 59 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 7.6640625, "calib/ece": 0.5115447154471544, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00614472281449896, "calib/mean_conf": 0.9668292682926828, "calib/mu_c": 0.9634821428571428, "calib/mu_w": 0.9696268656716418, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5115447154471544, "calib/std_conf": 0.01945673011117428, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.894878640776699, "calib/step_q_c_n": 824.0, "calib/step_q_gap": -0.005226807377958442, "calib/step_q_w": 0.9001054481546574, "calib/step_q_w_n": 1138.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 682.29296875, "completions/mean_terminated_length": 687.6653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 287.0, "epoch": 0.064, "grad_norm": 0.16230860352516174, "learning_rate": 3.916666666666667e-06, "loss": 0.0715, "num_tokens": 14111082.0, "reward": 0.91796875, "reward_std": 0.332300066947937, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.9609375, "step": 60 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 8.92578125, "calib/ece": 0.3466803278688526, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9959016393442623, "calib/gap": 0.001121555223243087, "calib/mean_conf": 0.965532786885246, "calib/mu_c": 0.9659602649006623, "calib/mu_w": 0.9648387096774192, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3466803278688526, "calib/std_conf": 0.019900442546454743, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8966260869565217, "calib/step_q_c_n": 1150.0, "calib/step_q_gap": -0.001743957096341786, "calib/step_q_w": 0.8983700440528635, "calib/step_q_w_n": 1135.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 639.625, "completions/mean_terminated_length": 642.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.06506666666666666, "grad_norm": 0.1348682940006256, "learning_rate": 3.88888888888889e-06, "loss": 0.1511, "num_tokens": 14378890.0, "reward": 1.06640625, "reward_std": 0.28018099069595337, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.953125, "step": 61 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 8.50390625, "calib/ece": 0.4697095435684647, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00022933884297526852, "calib/mean_conf": 0.9676348547717842, "calib/mu_c": 0.9677500000000001, "calib/mu_w": 0.9675206611570248, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4697095435684647, "calib/std_conf": 0.017326719732256125, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8983193779904305, "calib/step_q_c_n": 836.0, "calib/step_q_gap": 0.006620645701094108, "calib/step_q_w": 0.8916987322893364, "calib/step_q_w_n": 1341.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 709.62109375, "completions/mean_terminated_length": 720.8849487304688, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.06613333333333334, "grad_norm": 0.14342306554317474, "learning_rate": 3.861111111111112e-06, "loss": 0.0776, "num_tokens": 14667633.0, "reward": 0.9375, "reward_std": 0.33649760484695435, "rewards/accuracy_reward_step": 0.46875, "rewards/format_reward_step": 0.9375, "step": 62 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.171875, "calib/ece": 0.4264940239043825, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9920318725099602, "calib/gap": -0.009075989782886151, "calib/mean_conf": 0.9643426294820717, "calib/mu_c": 0.9601481481481482, "calib/mu_w": 0.9692241379310343, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4264940239043825, "calib/std_conf": 0.019876765136435336, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8941853360488797, "calib/step_q_c_n": 982.0, "calib/step_q_gap": -0.002231525777818155, "calib/step_q_w": 0.8964168618266979, "calib/step_q_w_n": 854.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 699.48828125, "completions/mean_terminated_length": 702.2313842773438, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.0672, "grad_norm": 0.1324063539505005, "learning_rate": 3.833333333333334e-06, "loss": 0.0506, "num_tokens": 14955342.0, "reward": 1.015625, "reward_std": 0.2985878586769104, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.9765625, "step": 63 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.59375, "calib/ece": 0.3233919999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0009152777777776677, "calib/mean_conf": 0.9633919999999999, "calib/mu_c": 0.9630625, "calib/mu_w": 0.9639777777777777, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3233919999999999, "calib/std_conf": 0.017738949687058702, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8929639401934916, "calib/step_q_c_n": 1137.0, "calib/step_q_gap": -0.005779554230300299, "calib/step_q_w": 0.8987434944237919, "calib/step_q_w_n": 807.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2398.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 655.84765625, "completions/mean_terminated_length": 661.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.06826666666666667, "grad_norm": 0.1513136774301529, "learning_rate": 3.8055555555555556e-06, "loss": 0.0724, "num_tokens": 15227015.0, "reward": 1.11328125, "reward_std": 0.3160572648048401, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.9765625, "step": 64 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.3814624505928854, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9920948616600791, "calib/gap": 0.0010383776151970947, "calib/mean_conf": 0.9624901185770751, "calib/mu_c": 0.9629251700680272, "calib/mu_w": 0.9618867924528302, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3814624505928854, "calib/std_conf": 0.020287579223183028, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.895662188099808, "calib/step_q_c_n": 1042.0, "calib/step_q_gap": -0.0013249913873714103, "calib/step_q_w": 0.8969871794871794, "calib/step_q_w_n": 780.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2471.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 601.99609375, "completions/mean_terminated_length": 601.99609375, "completions/min_length": 255.0, "completions/min_terminated_length": 255.0, "epoch": 0.06933333333333333, "grad_norm": 0.13157853484153748, "learning_rate": 3.777777777777778e-06, "loss": 0.0275, "num_tokens": 15486150.0, "reward": 1.064453125, "reward_std": 0.22381740808486938, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.98046875, "step": 65 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 7.77734375, "calib/ece": 0.4534710743801653, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9958677685950413, "calib/gap": -0.0015453986472636627, "calib/mean_conf": 0.9617355371900826, "calib/mu_c": 0.9609756097560977, "calib/mu_w": 0.9625210084033614, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4534710743801653, "calib/std_conf": 0.01922797918110774, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8947882736156351, "calib/step_q_c_n": 921.0, "calib/step_q_gap": -0.0035201375993182404, "calib/step_q_w": 0.8983084112149533, "calib/step_q_w_n": 1070.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 745.53125, "completions/mean_terminated_length": 748.4549560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.0704, "grad_norm": 0.13143111765384674, "learning_rate": 3.7500000000000005e-06, "loss": 0.0906, "num_tokens": 15783358.0, "reward": 0.953125, "reward_std": 0.28545472025871277, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.9453125, "step": 66 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.4296875, "calib/ece": 0.38402390438247, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0017703318152244574, "calib/mean_conf": 0.9617131474103585, "calib/mu_c": 0.9609655172413791, "calib/mu_w": 0.9627358490566036, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38402390438247, "calib/std_conf": 0.018289760117441916, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.897305447470817, "calib/step_q_c_n": 1028.0, "calib/step_q_gap": 0.0025800470131510655, "calib/step_q_w": 0.8947254004576659, "calib/step_q_w_n": 874.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2388.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 692.1015625, "completions/mean_terminated_length": 694.8157348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 253.0, "epoch": 0.07146666666666666, "grad_norm": 0.1354527324438095, "learning_rate": 3.7222222222222225e-06, "loss": 0.0546, "num_tokens": 16065544.0, "reward": 1.056640625, "reward_std": 0.22899171710014343, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98046875, "step": 67 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.41004048582995933, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9635627530364372, "calib/gap": -0.0029550264550266547, "calib/mean_conf": 0.9565991902834007, "calib/mu_c": 0.9552592592592591, "calib/mu_w": 0.9582142857142858, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41004048582995933, "calib/std_conf": 0.021665783004325333, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.892875, "calib/step_q_c_n": 960.0, "calib/step_q_gap": -0.0020060656517602027, "calib/step_q_w": 0.8948810656517602, "calib/step_q_w_n": 1051.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 662.22265625, "completions/mean_terminated_length": 664.8196411132812, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.07253333333333334, "grad_norm": 0.14325231313705444, "learning_rate": 3.694444444444445e-06, "loss": 0.0253, "num_tokens": 16339161.0, "reward": 1.013671875, "reward_std": 0.22165964543819427, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.96484375, "step": 68 }, { "calib/answer_extract_rate": 0.92578125, "calib/avg_num_step_conf": 7.5625, "calib/ece": 0.4172573839662447, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.9831223628691983, "calib/gap": -0.0003354357798164287, "calib/mean_conf": 0.9573417721518986, "calib/mu_c": 0.9571875000000001, "calib/mu_w": 0.9575229357798165, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4172573839662447, "calib/std_conf": 0.020869831299338597, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8965642151481887, "calib/step_q_c_n": 911.0, "calib/step_q_gap": 0.0017203127091643955, "calib/step_q_w": 0.8948439024390243, "calib/step_q_w_n": 1025.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 777.046875, "completions/mean_terminated_length": 789.3809814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 290.0, "epoch": 0.0736, "grad_norm": 0.13327306509017944, "learning_rate": 3.6666666666666666e-06, "loss": 0.0825, "num_tokens": 16642581.0, "reward": 0.962890625, "reward_std": 0.2655287981033325, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.92578125, "step": 69 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 7.8203125, "calib/ece": 0.47163934426229515, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9672131147540983, "calib/gap": -0.008681732580037904, "calib/mean_conf": 0.9552459016393443, "calib/mu_c": 0.9507627118644066, "calib/mu_w": 0.9594444444444445, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.47163934426229515, "calib/std_conf": 0.019823529435219485, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8922645739910313, "calib/step_q_c_n": 892.0, "calib/step_q_gap": 0.0011564658829230634, "calib/step_q_w": 0.8911081081081083, "calib/step_q_w_n": 1110.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 734.734375, "completions/mean_terminated_length": 743.4466552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.07466666666666667, "grad_norm": 0.13847662508487701, "learning_rate": 3.638888888888889e-06, "loss": 0.0989, "num_tokens": 16937665.0, "reward": 0.9375, "reward_std": 0.29290473461151123, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.953125, "step": 70 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.8984375, "calib/ece": 0.4378137651821862, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.97165991902834, "calib/gap": -0.001003937007874267, "calib/mean_conf": 0.9519838056680161, "calib/mu_c": 0.9514960629921257, "calib/mu_w": 0.9525, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4378137651821862, "calib/std_conf": 0.018846056001520246, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8933234421364985, "calib/step_q_c_n": 1011.0, "calib/step_q_gap": 0.003220837558755796, "calib/step_q_w": 0.8901026045777427, "calib/step_q_w_n": 1267.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 706.60546875, "completions/mean_terminated_length": 712.1693115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.07573333333333333, "grad_norm": 0.14184655249118805, "learning_rate": 3.6111111111111115e-06, "loss": 0.044, "num_tokens": 17222964.0, "reward": 0.978515625, "reward_std": 0.3507859408855438, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.96484375, "step": 71 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.8046875, "calib/ece": 0.4324999999999999, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9761904761904762, "calib/gap": -0.006306857611506955, "calib/mean_conf": 0.9523412698412698, "calib/mu_c": 0.9493129770992368, "calib/mu_w": 0.9556198347107437, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4324999999999999, "calib/std_conf": 0.020248324523440216, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8914902363823227, "calib/step_q_c_n": 973.0, "calib/step_q_gap": 0.0035487729676886293, "calib/step_q_w": 0.8879414634146341, "calib/step_q_w_n": 1025.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2827.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 668.3984375, "completions/mean_terminated_length": 671.0196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 303.0, "epoch": 0.0768, "grad_norm": 0.15166209638118744, "learning_rate": 3.5833333333333335e-06, "loss": 0.0593, "num_tokens": 17498482.0, "reward": 1.001953125, "reward_std": 0.29228222370147705, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.98046875, "step": 72 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.65234375, "calib/ece": 0.3369169960474308, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9762845849802372, "calib/gap": 0.00045622119815658113, "calib/mean_conf": 0.9495652173913044, "calib/mu_c": 0.9497419354838709, "calib/mu_w": 0.9492857142857143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3369169960474308, "calib/std_conf": 0.02027987708730161, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8940547476475621, "calib/step_q_c_n": 1169.0, "calib/step_q_gap": 0.00519398815389116, "calib/step_q_w": 0.8888607594936709, "calib/step_q_w_n": 790.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1846.0, "completions/max_terminated_length": 1846.0, "completions/mean_length": 652.015625, "completions/mean_terminated_length": 652.015625, "completions/min_length": 280.0, "completions/min_terminated_length": 280.0, "epoch": 0.07786666666666667, "grad_norm": 0.1515604704618454, "learning_rate": 3.555555555555556e-06, "loss": 0.0284, "num_tokens": 17772430.0, "reward": 1.095703125, "reward_std": 0.30325573682785034, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.98046875, "step": 73 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 7.64453125, "calib/ece": 0.43337499999999995, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.9416666666666667, "calib/gap": -0.003423248053392558, "calib/mean_conf": 0.9500416666666667, "calib/mu_c": 0.9483870967741936, "calib/mu_w": 0.9518103448275862, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.43337499999999995, "calib/std_conf": 0.02096620130644132, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8894485683987274, "calib/step_q_c_n": 943.0, "calib/step_q_gap": 0.0018252942369917369, "calib/step_q_w": 0.8876232741617357, "calib/step_q_w_n": 1014.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2468.0, "completions/max_terminated_length": 2468.0, "completions/mean_length": 676.2421875, "completions/mean_terminated_length": 686.9762573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.07893333333333333, "grad_norm": 0.1617853343486786, "learning_rate": 3.5277777777777784e-06, "loss": -0.0034, "num_tokens": 18049476.0, "reward": 0.953125, "reward_std": 0.31296655535697937, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.9375, "step": 74 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.03515625, "calib/ece": 0.28699604743083007, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9565217391304348, "calib/gap": -0.005362044817927347, "calib/mean_conf": 0.9510276679841898, "calib/mu_c": 0.9492261904761905, "calib/mu_w": 0.9545882352941178, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28699604743083007, "calib/std_conf": 0.020052579730554358, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8908532934131735, "calib/step_q_c_n": 1336.0, "calib/step_q_gap": 0.0002985083923691123, "calib/step_q_w": 0.8905547850208044, "calib/step_q_w_n": 721.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 631.2265625, "completions/mean_terminated_length": 631.2265625, "completions/min_length": 213.0, "completions/min_terminated_length": 213.0, "epoch": 0.08, "grad_norm": 0.16196872293949127, "learning_rate": 3.5e-06, "loss": 0.0076, "num_tokens": 18315822.0, "reward": 1.150390625, "reward_std": 0.298104852437973, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.98828125, "step": 75 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.78125, "calib/ece": 0.35212, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.948, "calib/gap": -0.0011333333333335416, "calib/mean_conf": 0.95212, "calib/mu_c": 0.9516666666666664, "calib/mu_w": 0.9528, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35212, "calib/std_conf": 0.02106906737375909, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.895199291408326, "calib/step_q_c_n": 1129.0, "calib/step_q_gap": 0.007169163945985324, "calib/step_q_w": 0.8880301274623407, "calib/step_q_w_n": 863.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2138.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 682.734375, "completions/mean_terminated_length": 693.5714721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 297.0, "epoch": 0.08106666666666666, "grad_norm": 0.13606837391853333, "learning_rate": 3.4722222222222224e-06, "loss": -0.0013, "num_tokens": 18593658.0, "reward": 1.07421875, "reward_std": 0.26280534267425537, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9765625, "step": 76 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.21484375, "calib/ece": 0.3220576131687243, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.934156378600823, "calib/gap": -0.0020849673202613106, "calib/mean_conf": 0.9516872427983539, "calib/mu_c": 0.9509150326797386, "calib/mu_w": 0.953, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3220576131687243, "calib/std_conf": 0.021373441314687876, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8877159468438538, "calib/step_q_c_n": 1204.0, "calib/step_q_gap": 0.006236525264320947, "calib/step_q_w": 0.8814794215795329, "calib/step_q_w_n": 899.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2677.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 689.83203125, "completions/mean_terminated_length": 698.0119018554688, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.08213333333333334, "grad_norm": 0.15443819761276245, "learning_rate": 3.444444444444445e-06, "loss": 0.0233, "num_tokens": 18874919.0, "reward": 1.068359375, "reward_std": 0.29310113191604614, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.94140625, "step": 77 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.1640625, "calib/ece": 0.361195219123506, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9123505976095617, "calib/gap": -0.004065861978483354, "calib/mean_conf": 0.9496414342629482, "calib/mu_c": 0.947972972972973, "calib/mu_w": 0.9520388349514564, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.360597609561753, "calib/std_conf": 0.023231703942294266, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8854640980735552, "calib/step_q_c_n": 1142.0, "calib/step_q_gap": -0.00010552217960946297, "calib/step_q_w": 0.8855696202531647, "calib/step_q_w_n": 948.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 711.49609375, "completions/mean_terminated_length": 714.2863159179688, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 0.0832, "grad_norm": 0.14217936992645264, "learning_rate": 3.416666666666667e-06, "loss": 0.0421, "num_tokens": 19165086.0, "reward": 1.0625, "reward_std": 0.28406330943107605, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.96875, "step": 78 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.00390625, "calib/ece": 0.30577689243027895, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9561752988047809, "calib/gap": 0.003775794757389761, "calib/mean_conf": 0.9551792828685259, "calib/mu_c": 0.9565030674846625, "calib/mu_w": 0.9527272727272728, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.30577689243027895, "calib/std_conf": 0.0209216840057658, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8896686746987952, "calib/step_q_c_n": 1328.0, "calib/step_q_gap": -0.0016073308490550664, "calib/step_q_w": 0.8912760055478502, "calib/step_q_w_n": 721.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2850.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 692.79296875, "completions/mean_terminated_length": 698.248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 358.0, "epoch": 0.08426666666666667, "grad_norm": 0.14957371354103088, "learning_rate": 3.3888888888888893e-06, "loss": 0.0032, "num_tokens": 19448817.0, "reward": 1.123046875, "reward_std": 0.2835806608200073, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.97265625, "step": 79 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.76171875, "calib/ece": 0.32219607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": -0.00024514338575398753, "calib/mean_conf": 0.9535686274509803, "calib/mu_c": 0.9534782608695651, "calib/mu_w": 0.9537234042553191, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32219607843137255, "calib/std_conf": 0.02371832658016091, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8882774049217003, "calib/step_q_c_n": 1341.0, "calib/step_q_gap": -0.006212617251248664, "calib/step_q_w": 0.8944900221729489, "calib/step_q_w_n": 902.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1933.0, "completions/max_terminated_length": 1933.0, "completions/mean_length": 629.71484375, "completions/mean_terminated_length": 632.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 318.0, "epoch": 0.08533333333333333, "grad_norm": 0.17289277911186218, "learning_rate": 3.3611111111111117e-06, "loss": 0.0163, "num_tokens": 19712184.0, "reward": 1.125, "reward_std": 0.3098638653755188, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.9921875, "step": 80 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.24609375, "calib/ece": 0.32431451612903217, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9556451612903226, "calib/gap": -0.0031465997770345755, "calib/mean_conf": 0.9533467741935483, "calib/mu_c": 0.9521794871794873, "calib/mu_w": 0.9553260869565219, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32431451612903217, "calib/std_conf": 0.01985046694617709, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8929888712241655, "calib/step_q_c_n": 1258.0, "calib/step_q_gap": 0.003538916309828166, "calib/step_q_w": 0.8894499549143373, "calib/step_q_w_n": 1109.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2736.0, "completions/max_terminated_length": 2736.0, "completions/mean_length": 659.609375, "completions/mean_terminated_length": 675.4400024414062, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.0864, "grad_norm": 0.15319500863552094, "learning_rate": 3.3333333333333333e-06, "loss": -0.0001, "num_tokens": 19987292.0, "reward": 1.091796875, "reward_std": 0.2939770221710205, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.96484375, "step": 81 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.80859375, "calib/ece": 0.3080158730158729, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9682539682539683, "calib/gap": 0.003663059212793973, "calib/mean_conf": 0.9548412698412697, "calib/mu_c": 0.9561349693251535, "calib/mu_w": 0.9524719101123595, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3080158730158729, "calib/std_conf": 0.024192814326865104, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8965398420674804, "calib/step_q_c_n": 1393.0, "calib/step_q_gap": 0.011180213297178665, "calib/step_q_w": 0.8853596287703017, "calib/step_q_w_n": 862.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 653.3515625, "completions/mean_terminated_length": 655.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 283.0, "epoch": 0.08746666666666666, "grad_norm": 0.16056913137435913, "learning_rate": 3.3055555555555558e-06, "loss": 0.0332, "num_tokens": 20260102.0, "reward": 1.125, "reward_std": 0.28356438875198364, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.9765625, "step": 82 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 9.03515625, "calib/ece": 0.4024899598393574, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9558232931726908, "calib/gap": -0.006429690560125434, "calib/mean_conf": 0.956706827309237, "calib/mu_c": 0.9538405797101449, "calib/mu_w": 0.9602702702702703, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4024899598393574, "calib/std_conf": 0.02036805893731143, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8947774979009234, "calib/step_q_c_n": 1191.0, "calib/step_q_gap": 0.005731152089871738, "calib/step_q_w": 0.8890463458110517, "calib/step_q_w_n": 1122.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 754.16796875, "completions/mean_terminated_length": 760.1063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 326.0, "epoch": 0.08853333333333334, "grad_norm": 0.13364803791046143, "learning_rate": 3.277777777777778e-06, "loss": 0.0606, "num_tokens": 20560433.0, "reward": 1.025390625, "reward_std": 0.22642236948013306, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.97265625, "step": 83 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.7109375, "calib/ece": 0.4169565217391304, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9604743083003953, "calib/gap": -4.4620412267382115e-05, "calib/mean_conf": 0.954505928853755, "calib/mu_c": 0.954485294117647, "calib/mu_w": 0.9545299145299144, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4169565217391304, "calib/std_conf": 0.02147369390827028, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.89727868852459, "calib/step_q_c_n": 1220.0, "calib/step_q_gap": 0.0010806687226097011, "calib/step_q_w": 0.8961980198019803, "calib/step_q_w_n": 1010.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 652.03515625, "completions/mean_terminated_length": 652.03515625, "completions/min_length": 215.0, "completions/min_terminated_length": 215.0, "epoch": 0.0896, "grad_norm": 0.19131937623023987, "learning_rate": 3.2500000000000002e-06, "loss": 0.0183, "num_tokens": 20833274.0, "reward": 1.025390625, "reward_std": 0.23556412756443024, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.98828125, "step": 84 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 9.48828125, "calib/ece": 0.3969709543568466, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.991701244813278, "calib/gap": 0.0021106442577029094, "calib/mean_conf": 0.961286307053942, "calib/mu_c": 0.962205882352941, "calib/mu_w": 0.9600952380952381, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3969709543568466, "calib/std_conf": 0.016155525860297164, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9003043110735418, "calib/step_q_c_n": 1183.0, "calib/step_q_gap": 0.002639784588790439, "calib/step_q_w": 0.8976645264847514, "calib/step_q_w_n": 1246.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 733.83984375, "completions/mean_terminated_length": 751.4520263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 349.0, "epoch": 0.09066666666666667, "grad_norm": 0.1620008498430252, "learning_rate": 3.2222222222222227e-06, "loss": 0.0209, "num_tokens": 21128961.0, "reward": 1.0, "reward_std": 0.29893848299980164, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.9375, "step": 85 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.078125, "calib/ece": 0.4341666666666667, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": 0.0021945357955981404, "calib/mean_conf": 0.9659126984126984, "calib/mu_c": 0.9669402985074627, "calib/mu_w": 0.9647457627118645, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4341666666666667, "calib/std_conf": 0.014892482305730816, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9049746621621619, "calib/step_q_c_n": 1184.0, "calib/step_q_gap": 0.0014571183025128276, "calib/step_q_w": 0.9035175438596491, "calib/step_q_w_n": 1140.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2919.0, "completions/max_terminated_length": 2919.0, "completions/mean_length": 697.66796875, "completions/mean_terminated_length": 703.1614379882812, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.09173333333333333, "grad_norm": 0.16130724549293518, "learning_rate": 3.1944444444444443e-06, "loss": 0.0167, "num_tokens": 21413076.0, "reward": 1.015625, "reward_std": 0.2880152463912964, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.984375, "step": 86 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 10.81640625, "calib/ece": 0.28087136929460577, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.995850622406639, "calib/gap": -0.0016433734939759193, "calib/mean_conf": 0.9696680497925311, "calib/mu_c": 0.9691566265060241, "calib/mu_w": 0.9708, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28087136929460577, "calib/std_conf": 0.014371110367996593, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9044444444444444, "calib/step_q_c_n": 1530.0, "calib/step_q_gap": -0.007928436911487924, "calib/step_q_w": 0.9123728813559323, "calib/step_q_w_n": 1239.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 626.60546875, "completions/mean_terminated_length": 657.422119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.0928, "grad_norm": 0.18359941244125366, "learning_rate": 3.1666666666666667e-06, "loss": -0.0573, "num_tokens": 21678983.0, "reward": 1.119140625, "reward_std": 0.32765060663223267, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.94140625, "step": 87 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.68359375, "calib/ece": 0.35642857142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0031220485533753894, "calib/mean_conf": 0.9715079365079365, "calib/mu_c": 0.9727096774193548, "calib/mu_w": 0.9695876288659794, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35642857142857143, "calib/std_conf": 0.010470477037499024, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9039452843772497, "calib/step_q_c_n": 1389.0, "calib/step_q_gap": -0.002485908283300753, "calib/step_q_w": 0.9064311926605505, "calib/step_q_w_n": 1090.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 726.24609375, "completions/mean_terminated_length": 734.8577270507812, "completions/min_length": 0.0, "completions/min_terminated_length": 367.0, "epoch": 0.09386666666666667, "grad_norm": 0.14980705082416534, "learning_rate": 3.138888888888889e-06, "loss": 0.0058, "num_tokens": 21974750.0, "reward": 1.095703125, "reward_std": 0.23912689089775085, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.98046875, "step": 88 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.45703125, "calib/ece": 0.4387, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00040530108080294447, "calib/mean_conf": 0.9747, "calib/mu_c": 0.9748880597014925, "calib/mu_w": 0.9744827586206896, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4387, "calib/std_conf": 0.011764777940955802, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9084339774557166, "calib/step_q_c_n": 1242.0, "calib/step_q_gap": -0.0014726439364733945, "calib/step_q_w": 0.90990662139219, "calib/step_q_w_n": 1178.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 757.140625, "completions/mean_terminated_length": 763.1023559570312, "completions/min_length": 0.0, "completions/min_terminated_length": 370.0, "epoch": 0.09493333333333333, "grad_norm": 0.14294065535068512, "learning_rate": 3.1111111111111116e-06, "loss": -0.0063, "num_tokens": 22277466.0, "reward": 1.0078125, "reward_std": 0.24918082356452942, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.96875, "step": 89 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 11.67578125, "calib/ece": 0.3420647773279353, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9959514170040485, "calib/gap": -0.0025824175824175066, "calib/mean_conf": 0.9736437246963563, "calib/mu_c": 0.9726923076923077, "calib/mu_w": 0.9752747252747253, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3420647773279353, "calib/std_conf": 0.0131513530920179, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9066065879428217, "calib/step_q_c_n": 1609.0, "calib/step_q_gap": -8.906423109134032e-05, "calib/step_q_w": 0.906695652173913, "calib/step_q_w_n": 1380.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 720.05859375, "completions/mean_terminated_length": 737.3400268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 311.0, "epoch": 0.096, "grad_norm": 0.12898504734039307, "learning_rate": 3.0833333333333336e-06, "loss": -0.0053, "num_tokens": 22565121.0, "reward": 1.091796875, "reward_std": 0.24940425157546997, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.96484375, "step": 90 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 10.08203125, "calib/ece": 0.3303252032520325, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0016005204944699036, "calib/mean_conf": 0.9766666666666667, "calib/mu_c": 0.9772327044025155, "calib/mu_w": 0.9756321839080456, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3303252032520325, "calib/std_conf": 0.013624861700896784, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.909741935483871, "calib/step_q_c_n": 1550.0, "calib/step_q_gap": -0.0014413816839273785, "calib/step_q_w": 0.9111833171677983, "calib/step_q_w_n": 1031.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2841.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 778.375, "completions/mean_terminated_length": 790.730224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 439.0, "epoch": 0.09706666666666666, "grad_norm": 0.14882391691207886, "learning_rate": 3.055555555555556e-06, "loss": -0.0026, "num_tokens": 22872097.0, "reward": 1.1015625, "reward_std": 0.2827257513999939, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9609375, "step": 91 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 10.078125, "calib/ece": 0.3375396825396826, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0014620162446249418, "calib/mean_conf": 0.9764285714285714, "calib/mu_c": 0.9769565217391304, "calib/mu_w": 0.9754945054945054, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3375396825396826, "calib/std_conf": 0.013999109139132488, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9123418491484184, "calib/step_q_c_n": 1644.0, "calib/step_q_gap": 0.002165567097136445, "calib/step_q_w": 0.910176282051282, "calib/step_q_w_n": 936.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 685.27734375, "completions/mean_terminated_length": 690.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 331.0, "epoch": 0.09813333333333334, "grad_norm": 0.1833995282649994, "learning_rate": 3.0277777777777776e-06, "loss": -0.0212, "num_tokens": 23154248.0, "reward": 1.115234375, "reward_std": 0.2714332938194275, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.97265625, "step": 92 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 11.328125, "calib/ece": 0.4156275303643724, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0010711430855315784, "calib/mean_conf": 0.9783805668016193, "calib/mu_c": 0.9788489208633092, "calib/mu_w": 0.9777777777777776, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4156275303643724, "calib/std_conf": 0.013845917084890089, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.913076923076923, "calib/step_q_c_n": 1443.0, "calib/step_q_gap": 0.0008188585607941112, "calib/step_q_w": 0.9122580645161289, "calib/step_q_w_n": 1457.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 710.171875, "completions/mean_terminated_length": 724.3187255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 317.0, "epoch": 0.0992, "grad_norm": 0.1591537445783615, "learning_rate": 3e-06, "loss": 0.0032, "num_tokens": 23441828.0, "reward": 1.025390625, "reward_std": 0.36088210344314575, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.96484375, "step": 93 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.3515625, "calib/ece": 0.3926360000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.002385773726966245, "calib/mean_conf": 0.9806360000000001, "calib/mu_c": 0.9796530612244897, "calib/mu_w": 0.9820388349514559, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3926360000000001, "calib/std_conf": 0.013215124062981788, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9103254310344827, "calib/step_q_c_n": 1392.0, "calib/step_q_gap": 0.0039132553857802455, "calib/step_q_w": 0.9064121756487025, "calib/step_q_w_n": 1002.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2695.0, "completions/max_terminated_length": 2695.0, "completions/mean_length": 676.484375, "completions/mean_terminated_length": 681.81103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 361.0, "epoch": 0.10026666666666667, "grad_norm": 0.15960992872714996, "learning_rate": 2.9722222222222225e-06, "loss": 0.0035, "num_tokens": 23723688.0, "reward": 1.0625, "reward_std": 0.27309247851371765, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9765625, "step": 94 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.98046875, "calib/ece": 0.4093110236220473, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.002019930401771508, "calib/mean_conf": 0.9801771653543307, "calib/mu_c": 0.9793103448275862, "calib/mu_w": 0.9813302752293577, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4093110236220473, "calib/std_conf": 0.01225018520815402, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9075780089153045, "calib/step_q_c_n": 1346.0, "calib/step_q_gap": 0.003385773868085251, "calib/step_q_w": 0.9041922350472192, "calib/step_q_w_n": 953.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1588.0, "completions/max_terminated_length": 1588.0, "completions/mean_length": 711.0, "completions/mean_terminated_length": 713.7882690429688, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 0.10133333333333333, "grad_norm": 0.15578389167785645, "learning_rate": 2.944444444444445e-06, "loss": 0.0051, "num_tokens": 24011832.0, "reward": 1.060546875, "reward_std": 0.2807765603065491, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98828125, "step": 95 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.54296875, "calib/ece": 0.23486274509803928, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0027437827225134104, "calib/mean_conf": 0.9838823529411765, "calib/mu_c": 0.9831937172774867, "calib/mu_w": 0.9859375000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23486274509803928, "calib/std_conf": 0.010674593363837137, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9067102803738317, "calib/step_q_c_n": 1605.0, "calib/step_q_gap": -0.002155699007611589, "calib/step_q_w": 0.9088659793814433, "calib/step_q_w_n": 582.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 633.14453125, "completions/mean_terminated_length": 633.14453125, "completions/min_length": 260.0, "completions/min_terminated_length": 260.0, "epoch": 0.1024, "grad_norm": 0.16016392409801483, "learning_rate": 2.916666666666667e-06, "loss": 0.0198, "num_tokens": 24279733.0, "reward": 1.244140625, "reward_std": 0.17649205029010773, "rewards/accuracy_reward_step": 0.74609375, "rewards/format_reward_step": 0.99609375, "step": 96 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.6796875, "calib/ece": 0.3653937007874015, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.002666622890537651, "calib/mean_conf": 0.983503937007874, "calib/mu_c": 0.9845222929936305, "calib/mu_w": 0.9818556701030928, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3653937007874015, "calib/std_conf": 0.011769019798592162, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.908495575221239, "calib/step_q_c_n": 1356.0, "calib/step_q_gap": 0.0027219031658117787, "calib/step_q_w": 0.9057736720554272, "calib/step_q_w_n": 866.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 687.04296875, "completions/mean_terminated_length": 689.7373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 294.0, "epoch": 0.10346666666666667, "grad_norm": 0.17087474465370178, "learning_rate": 2.888888888888889e-06, "loss": 0.0099, "num_tokens": 24560688.0, "reward": 1.10546875, "reward_std": 0.2656993269920349, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.984375, "step": 97 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.8984375, "calib/ece": 0.37048192771084343, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.000945669934640736, "calib/mean_conf": 0.9849397590361446, "calib/mu_c": 0.9845751633986928, "calib/mu_w": 0.9855208333333335, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37048192771084343, "calib/std_conf": 0.011269623026294539, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9078645066273933, "calib/step_q_c_n": 1358.0, "calib/step_q_gap": 0.0008373327143498255, "calib/step_q_w": 0.9070271739130434, "calib/step_q_w_n": 920.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2856.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 717.7890625, "completions/mean_terminated_length": 723.44091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 283.0, "epoch": 0.10453333333333334, "grad_norm": 0.16385309398174286, "learning_rate": 2.861111111111111e-06, "loss": 0.0191, "num_tokens": 24850626.0, "reward": 1.0859375, "reward_std": 0.29347917437553406, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.96875, "step": 98 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.84375, "calib/ece": 0.566008064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.003771367521367641, "calib/mean_conf": 0.9853629032258064, "calib/mu_c": 0.9831730769230769, "calib/mu_w": 0.9869444444444445, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.566008064516129, "calib/std_conf": 0.011317866746887967, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.906843729903537, "calib/step_q_c_n": 933.0, "calib/step_q_gap": 0.004226900451996918, "calib/step_q_w": 0.9026168294515401, "calib/step_q_w_n": 1331.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2636.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 738.0546875, "completions/mean_terminated_length": 755.76806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 310.0, "epoch": 0.1056, "grad_norm": 0.14243488013744354, "learning_rate": 2.8333333333333335e-06, "loss": -0.0466, "num_tokens": 25145368.0, "reward": 0.890625, "reward_std": 0.2674334943294525, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.96875, "step": 99 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.42079600000000017, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0005894332747736764, "calib/mean_conf": 0.9847960000000001, "calib/mu_c": 0.9845390070921985, "calib/mu_w": 0.9851284403669722, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42079600000000017, "calib/std_conf": 0.011173288862282235, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9026158038147138, "calib/step_q_c_n": 1101.0, "calib/step_q_gap": -0.0034847252858152267, "calib/step_q_w": 0.9061005291005291, "calib/step_q_w_n": 945.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 711.9609375, "completions/mean_terminated_length": 711.9609375, "completions/min_length": 345.0, "completions/min_terminated_length": 345.0, "epoch": 0.10666666666666667, "grad_norm": 0.16134221851825714, "learning_rate": 2.805555555555556e-06, "loss": 0.0355, "num_tokens": 25435038.0, "reward": 1.0390625, "reward_std": 0.2604951858520508, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.9765625, "step": 100 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.37109375, "calib/ece": 0.5147389558232933, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0022630147630147146, "calib/mean_conf": 0.9846184738955824, "calib/mu_c": 0.9834188034188034, "calib/mu_w": 0.9856818181818181, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5147389558232933, "calib/std_conf": 0.01144021565431309, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9019877049180327, "calib/step_q_c_n": 976.0, "calib/step_q_gap": -0.001585559863458319, "calib/step_q_w": 0.903573264781491, "calib/step_q_w_n": 1167.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 743.0703125, "completions/mean_terminated_length": 751.8814697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 289.0, "epoch": 0.10773333333333333, "grad_norm": 0.16494955122470856, "learning_rate": 2.7777777777777783e-06, "loss": 0.0291, "num_tokens": 25732256.0, "reward": 0.939453125, "reward_std": 0.3241780698299408, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.96484375, "step": 101 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.3281200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": -0.00382302892796349, "calib/mean_conf": 0.9841200000000001, "calib/mu_c": 0.9828048780487806, "calib/mu_w": 0.9866279069767441, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3281200000000001, "calib/std_conf": 0.012009396321214493, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9031399564902102, "calib/step_q_c_n": 1379.0, "calib/step_q_gap": -0.0012452894114289847, "calib/step_q_w": 0.9043852459016392, "calib/step_q_w_n": 732.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 638.1484375, "completions/mean_terminated_length": 640.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 345.0, "epoch": 0.1088, "grad_norm": 0.16382494568824768, "learning_rate": 2.7500000000000004e-06, "loss": 0.0055, "num_tokens": 26002318.0, "reward": 1.125, "reward_std": 0.22881178557872772, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.96875, "step": 102 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.57421875, "calib/ece": 0.3984337349397592, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0006250831227556652, "calib/mean_conf": 0.9847791164658636, "calib/mu_c": 0.9845205479452055, "calib/mu_w": 0.9851456310679612, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3984337349397592, "calib/std_conf": 0.009696778624122087, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9016399286987522, "calib/step_q_c_n": 1122.0, "calib/step_q_gap": 0.001933686348691066, "calib/step_q_w": 0.8997062423500611, "calib/step_q_w_n": 817.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 734.54296875, "completions/mean_terminated_length": 743.2529907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.10986666666666667, "grad_norm": 0.12834714353084564, "learning_rate": 2.7222222222222224e-06, "loss": -0.0067, "num_tokens": 26294913.0, "reward": 1.056640625, "reward_std": 0.22446666657924652, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.97265625, "step": 103 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 7.81640625, "calib/ece": 0.4489803921568628, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0008554991958429348, "calib/mean_conf": 0.9862352941176471, "calib/mu_c": 0.9858394160583942, "calib/mu_w": 0.9866949152542371, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4489803921568628, "calib/std_conf": 0.009071801246426833, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9019178082191781, "calib/step_q_c_n": 1095.0, "calib/step_q_gap": -0.0005656354894311155, "calib/step_q_w": 0.9024834437086092, "calib/step_q_w_n": 906.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 672.34765625, "completions/mean_terminated_length": 672.34765625, "completions/min_length": 270.0, "completions/min_terminated_length": 270.0, "epoch": 0.11093333333333333, "grad_norm": 0.16741102933883667, "learning_rate": 2.6944444444444444e-06, "loss": 0.0343, "num_tokens": 26573714.0, "reward": 1.033203125, "reward_std": 0.24684098362922668, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.38137254901960793, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 7.715057220025745e-05, "calib/mean_conf": 0.9852941176470589, "calib/mu_c": 0.9853246753246754, "calib/mu_w": 0.9852475247524751, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38137254901960793, "calib/std_conf": 0.009892152004507259, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.902717041800643, "calib/step_q_c_n": 1244.0, "calib/step_q_gap": -0.0008843732936965054, "calib/step_q_w": 0.9036014150943396, "calib/step_q_w_n": 848.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2202.0, "completions/max_terminated_length": 2202.0, "completions/mean_length": 737.6484375, "completions/mean_terminated_length": 740.5411987304688, "completions/min_length": 0.0, "completions/min_terminated_length": 296.0, "epoch": 0.112, "grad_norm": 0.14760896563529968, "learning_rate": 2.666666666666667e-06, "loss": 0.0258, "num_tokens": 26868312.0, "reward": 1.09765625, "reward_std": 0.20331215858459473, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.9921875, "step": 105 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.6015625, "calib/ece": 0.43822834645669284, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00318861432593065, "calib/mean_conf": 0.9854724409448818, "calib/mu_c": 0.9840287769784173, "calib/mu_w": 0.9872173913043479, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.43822834645669284, "calib/std_conf": 0.009368507113614807, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8987144259077526, "calib/step_q_c_n": 1019.0, "calib/step_q_gap": -0.004867019615440338, "calib/step_q_w": 0.903581445523193, "calib/step_q_w_n": 927.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2742.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 672.83203125, "completions/mean_terminated_length": 672.83203125, "completions/min_length": 321.0, "completions/min_terminated_length": 321.0, "epoch": 0.11306666666666666, "grad_norm": 0.18181079626083374, "learning_rate": 2.6388888888888893e-06, "loss": 0.0294, "num_tokens": 27145141.0, "reward": 1.037109375, "reward_std": 0.27421265840530396, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.98828125, "step": 106 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.1171875, "calib/ece": 0.28341269841269856, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": -0.0014327683615816822, "calib/mean_conf": 0.9857936507936509, "calib/mu_c": 0.9853672316384182, "calib/mu_w": 0.9867999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28341269841269856, "calib/std_conf": 0.01160549778058649, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9035870331219169, "calib/step_q_c_n": 1419.0, "calib/step_q_gap": -0.0038939987445474333, "calib/step_q_w": 0.9074810318664643, "calib/step_q_w_n": 659.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2731.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 662.66796875, "completions/mean_terminated_length": 667.8858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 329.0, "epoch": 0.11413333333333334, "grad_norm": 0.20197457075119019, "learning_rate": 2.6111111111111113e-06, "loss": -0.0018, "num_tokens": 27419400.0, "reward": 1.177734375, "reward_std": 0.3254070281982422, "rewards/accuracy_reward_step": 0.69140625, "rewards/format_reward_step": 0.97265625, "step": 107 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 8.56640625, "calib/ece": 0.2770564516129034, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0004924242424242387, "calib/mean_conf": 0.9867338709677421, "calib/mu_c": 0.986590909090909, "calib/mu_w": 0.9870833333333332, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2770564516129034, "calib/std_conf": 0.009170085368237865, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.906232782369146, "calib/step_q_c_n": 1452.0, "calib/step_q_gap": -0.00043388429752055924, "calib/step_q_w": 0.9066666666666665, "calib/step_q_w_n": 741.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 717.0625, "completions/mean_terminated_length": 728.4444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 261.0, "epoch": 0.1152, "grad_norm": 0.16753298044204712, "learning_rate": 2.5833333333333337e-06, "loss": -0.0362, "num_tokens": 27706200.0, "reward": 1.16796875, "reward_std": 0.30900561809539795, "rewards/accuracy_reward_step": 0.6875, "rewards/format_reward_step": 0.9609375, "step": 108 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.51171875, "calib/ece": 0.48186234817813767, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0010026229508198758, "calib/mean_conf": 0.9879352226720648, "calib/mu_c": 0.98744, "calib/mu_w": 0.9884426229508199, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48186234817813767, "calib/std_conf": 0.007805054824974812, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9067155963302752, "calib/step_q_c_n": 1090.0, "calib/step_q_gap": -0.00024351147641632487, "calib/step_q_w": 0.9069591078066915, "calib/step_q_w_n": 1345.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 727.7890625, "completions/mean_terminated_length": 739.34130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 371.0, "epoch": 0.11626666666666667, "grad_norm": 0.14441752433776855, "learning_rate": 2.5555555555555557e-06, "loss": -0.0015, "num_tokens": 27997114.0, "reward": 0.96875, "reward_std": 0.226576566696167, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.9609375, "step": 109 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.71875, "calib/ece": 0.39039370078740154, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006475748194016973, "calib/mean_conf": 0.9888188976377953, "calib/mu_c": 0.9890789473684211, "calib/mu_w": 0.9884313725490194, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.39039370078740154, "calib/std_conf": 0.007275692810407256, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.9081130507065669, "calib/step_q_c_n": 1203.0, "calib/step_q_gap": -0.00981697844795204, "calib/step_q_w": 0.9179300291545189, "calib/step_q_w_n": 1029.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 663.86328125, "completions/mean_terminated_length": 666.4667358398438, "completions/min_length": 0.0, "completions/min_terminated_length": 261.0, "epoch": 0.11733333333333333, "grad_norm": 0.15150228142738342, "learning_rate": 2.5277777777777778e-06, "loss": 0.0132, "num_tokens": 28271983.0, "reward": 1.0859375, "reward_std": 0.25357189774513245, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.984375, "step": 110 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.94921875, "calib/ece": 0.3785943775100403, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0012920510037981447, "calib/mean_conf": 0.9890361445783135, "calib/mu_c": 0.9895394736842105, "calib/mu_w": 0.9882474226804123, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3785943775100403, "calib/std_conf": 0.007436120112295035, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9105534495830174, "calib/step_q_c_n": 1319.0, "calib/step_q_gap": -0.007152311733854999, "calib/step_q_w": 0.9177057613168724, "calib/step_q_w_n": 972.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2780.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 717.40234375, "completions/mean_terminated_length": 725.9091186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 338.0, "epoch": 0.1184, "grad_norm": 0.1705169975757599, "learning_rate": 2.5e-06, "loss": -0.0176, "num_tokens": 28563046.0, "reward": 1.080078125, "reward_std": 0.33822542428970337, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.578125, "calib/ece": 0.39368000000000014, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0010352847365274753, "calib/mean_conf": 0.9896800000000001, "calib/mu_c": 0.9892617449664428, "calib/mu_w": 0.9902970297029703, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39368000000000014, "calib/std_conf": 0.0075297808733056845, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9101587301587303, "calib/step_q_c_n": 1323.0, "calib/step_q_gap": -0.001021109474717674, "calib/step_q_w": 0.911179839633448, "calib/step_q_w_n": 873.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 707.21875, "completions/mean_terminated_length": 721.3068237304688, "completions/min_length": 0.0, "completions/min_terminated_length": 360.0, "epoch": 0.11946666666666667, "grad_norm": 0.16528332233428955, "learning_rate": 2.4722222222222226e-06, "loss": -0.028, "num_tokens": 28852014.0, "reward": 1.0703125, "reward_std": 0.23935158550739288, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.9765625, "step": 112 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 9.5703125, "calib/ece": 0.40708661417322833, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0005673125956144887, "calib/mean_conf": 0.989763779527559, "calib/mu_c": 0.989527027027027, "calib/mu_w": 0.9900943396226415, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40708661417322833, "calib/std_conf": 0.007629673415280169, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9150035486160397, "calib/step_q_c_n": 1409.0, "calib/step_q_gap": -0.00504448212363362, "calib/step_q_w": 0.9200480307396733, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2690.0, "completions/max_terminated_length": 2690.0, "completions/mean_length": 689.4140625, "completions/mean_terminated_length": 692.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 378.0, "epoch": 0.12053333333333334, "grad_norm": 0.17403432726860046, "learning_rate": 2.4444444444444447e-06, "loss": 0.0031, "num_tokens": 29133704.0, "reward": 1.07421875, "reward_std": 0.268023818731308, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9921875, "step": 113 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 10.5546875, "calib/ece": 0.3272936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0003670306445929805, "calib/mean_conf": 0.9899920634920635, "calib/mu_c": 0.9898682634730539, "calib/mu_w": 0.9902352941176469, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3272936507936508, "calib/std_conf": 0.0062246259934130655, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9154449938195304, "calib/step_q_c_n": 1618.0, "calib/step_q_gap": -0.017676777398181853, "calib/step_q_w": 0.9331217712177122, "calib/step_q_w_n": 1084.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 676.90625, "completions/mean_terminated_length": 682.2362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 282.0, "epoch": 0.1216, "grad_norm": 0.1270322948694229, "learning_rate": 2.4166666666666667e-06, "loss": 0.0156, "num_tokens": 29412016.0, "reward": 1.14453125, "reward_std": 0.1828223466873169, "rewards/accuracy_reward_step": 0.65234375, "rewards/format_reward_step": 0.984375, "step": 114 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.7109375, "calib/ece": 0.46885375494071146, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0001308539944905318, "calib/mean_conf": 0.9905928853754941, "calib/mu_c": 0.990530303030303, "calib/mu_w": 0.9906611570247935, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.46885375494071146, "calib/std_conf": 0.005696912413070543, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9160295395308427, "calib/step_q_c_n": 1151.0, "calib/step_q_gap": -0.00796296983245337, "calib/step_q_w": 0.923992509363296, "calib/step_q_w_n": 1335.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 703.52734375, "completions/mean_terminated_length": 706.2863159179688, "completions/min_length": 0.0, "completions/min_terminated_length": 368.0, "epoch": 0.12266666666666666, "grad_norm": 0.16146261990070343, "learning_rate": 2.388888888888889e-06, "loss": 0.0307, "num_tokens": 29697383.0, "reward": 1.0078125, "reward_std": 0.23582719266414642, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.984375, "step": 115 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.7890625, "calib/ece": 0.399436, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00020747217806049711, "calib/mean_conf": 0.991436, "calib/mu_c": 0.9913513513513511, "calib/mu_w": 0.9915588235294116, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.399436, "calib/std_conf": 0.005609091191984675, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9153267045454547, "calib/step_q_c_n": 1408.0, "calib/step_q_gap": -0.020064728246273478, "calib/step_q_w": 0.9353914327917282, "calib/step_q_w_n": 1354.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2712.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 726.54296875, "completions/mean_terminated_length": 738.075439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 0.12373333333333333, "grad_norm": 0.15040576457977295, "learning_rate": 2.361111111111111e-06, "loss": 0.0051, "num_tokens": 29987898.0, "reward": 1.06640625, "reward_std": 0.30778148770332336, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9765625, "step": 116 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 10.1953125, "calib/ece": 0.490796, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0008080000000000309, "calib/mean_conf": 0.990796, "calib/mu_c": 0.9911999999999999, "calib/mu_w": 0.9903919999999998, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.490796, "calib/std_conf": 0.00514688099726427, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9176000000000002, "calib/step_q_c_n": 1150.0, "calib/step_q_gap": -0.008115068493150357, "calib/step_q_w": 0.9257150684931506, "calib/step_q_w_n": 1460.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 698.19140625, "completions/mean_terminated_length": 712.099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 345.0, "epoch": 0.1248, "grad_norm": 0.16612644493579865, "learning_rate": 2.3333333333333336e-06, "loss": -0.0003, "num_tokens": 30273235.0, "reward": 0.9765625, "reward_std": 0.3108536899089813, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.9765625, "step": 117 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 10.29296875, "calib/ece": 0.43342231075697224, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0013442728442730756, "calib/mean_conf": 0.991191235059761, "calib/mu_c": 0.9917857142857143, "calib/mu_w": 0.9904414414414412, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.43342231075697224, "calib/std_conf": 0.004648960264656658, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.911376488095238, "calib/step_q_c_n": 1344.0, "calib/step_q_gap": -0.012913209813360194, "calib/step_q_w": 0.9242896979085982, "calib/step_q_w_n": 1291.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 679.640625, "completions/mean_terminated_length": 693.1793212890625, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.12586666666666665, "grad_norm": 0.14906100928783417, "learning_rate": 2.305555555555556e-06, "loss": -0.0502, "num_tokens": 30551231.0, "reward": 1.037109375, "reward_std": 0.2077845185995102, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.98046875, "step": 118 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.76953125, "calib/ece": 0.4146774193548388, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 9.057609057583349e-05, "calib/mean_conf": 0.9912903225806453, "calib/mu_c": 0.9913286713286711, "calib/mu_w": 0.9912380952380953, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4146774193548388, "calib/std_conf": 0.0036956255604482613, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9124657534246575, "calib/step_q_c_n": 1168.0, "calib/step_q_gap": 0.00017768140665308518, "calib/step_q_w": 0.9122880720180044, "calib/step_q_w_n": 1333.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 726.0859375, "completions/mean_terminated_length": 746.4979858398438, "completions/min_length": 0.0, "completions/min_terminated_length": 319.0, "epoch": 0.12693333333333334, "grad_norm": 0.15550687909126282, "learning_rate": 2.277777777777778e-06, "loss": -0.0534, "num_tokens": 30842173.0, "reward": 1.04296875, "reward_std": 0.2845311462879181, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.96875, "step": 119 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 9.30078125, "calib/ece": 0.3415243741765482, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": -0.0036297979797981217, "calib/mean_conf": 0.9884281949934123, "calib/mu_c": 0.9871656565656564, "calib/mu_w": 0.9907954545454545, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3388893280632412, "calib/std_conf": 0.04153036208504551, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9176109643328929, "calib/step_q_c_n": 1514.0, "calib/step_q_gap": 0.040852025463227304, "calib/step_q_w": 0.8767589388696656, "calib/step_q_w_n": 867.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 659.55859375, "completions/mean_terminated_length": 667.3794555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 374.0, "epoch": 0.128, "grad_norm": 0.13183537125587463, "learning_rate": 2.25e-06, "loss": -0.0255, "num_tokens": 31117708.0, "reward": 1.138671875, "reward_std": 0.18556472659111023, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.98828125, "step": 120 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.5078125, "calib/ece": 0.4200396825396827, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00013888888888913264, "calib/mean_conf": 0.9914682539682541, "calib/mu_c": 0.9915277777777778, "calib/mu_w": 0.9913888888888887, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4200396825396827, "calib/std_conf": 0.003649715161611516, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9121318144833198, "calib/step_q_c_n": 1229.0, "calib/step_q_gap": -0.0012612308275337103, "calib/step_q_w": 0.9133930453108535, "calib/step_q_w_n": 949.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 719.19140625, "completions/mean_terminated_length": 724.8543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 310.0, "epoch": 0.12906666666666666, "grad_norm": 0.17723466455936432, "learning_rate": 2.222222222222222e-06, "loss": 0.0058, "num_tokens": 31406877.0, "reward": 1.0546875, "reward_std": 0.3372430205345154, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.984375, "step": 121 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.2734375, "calib/ece": 0.34686400000000017, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0005077814222903543, "calib/mean_conf": 0.9908640000000002, "calib/mu_c": 0.9906832298136646, "calib/mu_w": 0.991191011235955, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34686400000000017, "calib/std_conf": 0.0038664588449898217, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9112178517397883, "calib/step_q_c_n": 1322.0, "calib/step_q_gap": -0.0012720980089555578, "calib/step_q_w": 0.9124899497487439, "calib/step_q_w_n": 796.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2680.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 700.76953125, "completions/mean_terminated_length": 706.2874145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 350.0, "epoch": 0.13013333333333332, "grad_norm": 0.16055411100387573, "learning_rate": 2.1944444444444445e-06, "loss": 0.0187, "num_tokens": 31693618.0, "reward": 1.1171875, "reward_std": 0.21959704160690308, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.6484375, "calib/ece": 0.432988047808765, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00025804375804361346, "calib/mean_conf": 0.9907569721115538, "calib/mu_c": 0.9906428571428572, "calib/mu_w": 0.9909009009009008, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.432988047808765, "calib/std_conf": 0.0029309281526294084, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9100867302688638, "calib/step_q_c_n": 1153.0, "calib/step_q_gap": -0.0025051641703445204, "calib/step_q_w": 0.9125918944392083, "calib/step_q_w_n": 1061.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 735.09765625, "completions/mean_terminated_length": 746.7659301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.1312, "grad_norm": 0.1874084323644638, "learning_rate": 2.166666666666667e-06, "loss": -0.0302, "num_tokens": 31987091.0, "reward": 1.03515625, "reward_std": 0.3761196434497833, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.9765625, "step": 123 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.87109375, "calib/ece": 0.2989723320158103, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00014065934065921049, "calib/mean_conf": 0.9906719367588933, "calib/mu_c": 0.9906285714285712, "calib/mu_w": 0.9907692307692304, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2989723320158103, "calib/std_conf": 0.002801588165481323, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9077858176555716, "calib/step_q_c_n": 1382.0, "calib/step_q_gap": -0.0005396167836069221, "calib/step_q_w": 0.9083254344391786, "calib/step_q_w_n": 633.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2169.0, "completions/max_terminated_length": 2169.0, "completions/mean_length": 693.8984375, "completions/mean_terminated_length": 702.1265258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 321.0, "epoch": 0.13226666666666667, "grad_norm": 0.13662093877792358, "learning_rate": 2.138888888888889e-06, "loss": -0.0311, "num_tokens": 32271545.0, "reward": 1.177734375, "reward_std": 0.21031202375888824, "rewards/accuracy_reward_step": 0.68359375, "rewards/format_reward_step": 0.98828125, "step": 124 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.41015625, "calib/ece": 0.4788709677419356, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005986854948915044, "calib/mean_conf": 0.990967741935484, "calib/mu_c": 0.9912598425196849, "calib/mu_w": 0.9906611570247934, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4788709677419356, "calib/std_conf": 0.003217731829558068, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9117524752475249, "calib/step_q_c_n": 1010.0, "calib/step_q_gap": -0.0020358012179169105, "calib/step_q_w": 0.9137882764654418, "calib/step_q_w_n": 1143.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 723.49609375, "completions/mean_terminated_length": 734.980224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 390.0, "epoch": 0.13333333333333333, "grad_norm": 0.16686882078647614, "learning_rate": 2.1111111111111114e-06, "loss": -0.0043, "num_tokens": 32561568.0, "reward": 0.978515625, "reward_std": 0.2920733094215393, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.96484375, "step": 125 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.3515625, "calib/ece": 0.497404858299595, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.052459016390397e-05, "calib/mean_conf": 0.9913319838056679, "calib/mu_c": 0.991311475409836, "calib/mu_w": 0.9913519999999999, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.497404858299595, "calib/std_conf": 0.004045679120530505, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9121834862385321, "calib/step_q_c_n": 1090.0, "calib/step_q_gap": 0.011021676422581117, "calib/step_q_w": 0.901161809815951, "calib/step_q_w_n": 1304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2704.0, "completions/max_terminated_length": 2704.0, "completions/mean_length": 744.265625, "completions/mean_terminated_length": 759.0916748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 319.0, "epoch": 0.1344, "grad_norm": 0.1326979398727417, "learning_rate": 2.0833333333333334e-06, "loss": -0.0206, "num_tokens": 32857564.0, "reward": 0.958984375, "reward_std": 0.2162768691778183, "rewards/accuracy_reward_step": 0.4765625, "rewards/format_reward_step": 0.96484375, "step": 126 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 8.7265625, "calib/ece": 0.4245119047619048, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.001260601783537596, "calib/mean_conf": 0.9919722222222223, "calib/mu_c": 0.9925174825174824, "calib/mu_w": 0.9912568807339448, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4245119047619048, "calib/std_conf": 0.003968529752321603, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9125038520801233, "calib/step_q_c_n": 1298.0, "calib/step_q_gap": 0.011049792251063506, "calib/step_q_w": 0.9014540598290598, "calib/step_q_w_n": 936.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 705.546875, "completions/mean_terminated_length": 708.3137817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 384.0, "epoch": 0.13546666666666668, "grad_norm": 0.15588776767253876, "learning_rate": 2.0555555555555555e-06, "loss": 0.0308, "num_tokens": 33141856.0, "reward": 1.05078125, "reward_std": 0.22860543429851532, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.984375, "step": 127 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.43579600000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": 0.009125129399585874, "calib/mean_conf": 0.987796, "calib/mu_c": 0.9918840579710144, "calib/mu_w": 0.9827589285714285, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.43579600000000007, "calib/std_conf": 0.06272691913365425, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9094268077601411, "calib/step_q_c_n": 1134.0, "calib/step_q_gap": 0.05834759983934912, "calib/step_q_w": 0.851079207920792, "calib/step_q_w_n": 1010.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 710.8984375, "completions/mean_terminated_length": 722.1825561523438, "completions/min_length": 0.0, "completions/min_terminated_length": 326.0, "epoch": 0.13653333333333334, "grad_norm": 0.16684812307357788, "learning_rate": 2.027777777777778e-06, "loss": -0.0061, "num_tokens": 33430510.0, "reward": 1.02734375, "reward_std": 0.29256534576416016, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.9765625, "step": 128 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.6875, "calib/ece": 0.42376640419947514, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": -0.00461604977327823, "calib/mean_conf": 0.9893832020997375, "calib/mu_c": 0.9874022988505748, "calib/mu_w": 0.9920183486238531, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42114173228346463, "calib/std_conf": 0.04155039059293514, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9116179952644041, "calib/step_q_c_n": 1267.0, "calib/step_q_gap": 0.00417807885897048, "calib/step_q_w": 0.9074399164054336, "calib/step_q_w_n": 957.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2578.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 669.89453125, "completions/mean_terminated_length": 669.89453125, "completions/min_length": 317.0, "completions/min_terminated_length": 317.0, "epoch": 0.1376, "grad_norm": 0.1644672006368637, "learning_rate": 2.0000000000000003e-06, "loss": 0.0153, "num_tokens": 33704387.0, "reward": 1.060546875, "reward_std": 0.23660747706890106, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98828125, "step": 129 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.1875, "calib/ece": 0.38050980392156863, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002505827505827618, "calib/mean_conf": 0.9922745098039216, "calib/mu_c": 0.9923717948717948, "calib/mu_w": 0.9921212121212121, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38050980392156863, "calib/std_conf": 0.00446370298827677, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9108261211644375, "calib/step_q_c_n": 1271.0, "calib/step_q_gap": 0.0011836969220132554, "calib/step_q_w": 0.9096424242424243, "calib/step_q_w_n": 825.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2649.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 656.65234375, "completions/mean_terminated_length": 656.65234375, "completions/min_length": 336.0, "completions/min_terminated_length": 336.0, "epoch": 0.13866666666666666, "grad_norm": 0.13385489583015442, "learning_rate": 1.9722222222222224e-06, "loss": 0.0115, "num_tokens": 33977778.0, "reward": 1.107421875, "reward_std": 0.15861839056015015, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 8.703125, "calib/ece": 0.5781274900398408, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.1020408163293673e-05, "calib/mean_conf": 0.9924701195219124, "calib/mu_c": 0.9924999999999999, "calib/mu_w": 0.9924489795918366, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5781274900398408, "calib/std_conf": 0.004835347067234673, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9095091324200913, "calib/step_q_c_n": 876.0, "calib/step_q_gap": 0.004057948988138604, "calib/step_q_w": 0.9054511834319527, "calib/step_q_w_n": 1352.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2442.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 670.1796875, "completions/mean_terminated_length": 680.8175048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 405.0, "epoch": 0.13973333333333332, "grad_norm": 0.14848309755325317, "learning_rate": 1.944444444444445e-06, "loss": -0.0111, "num_tokens": 34255552.0, "reward": 0.89453125, "reward_std": 0.17193031311035156, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.9765625, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.59375, "calib/ece": 0.35124409448818905, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0014529764713813176, "calib/mean_conf": 0.992976377952756, "calib/mu_c": 0.9934969325153374, "calib/mu_w": 0.9920439560439561, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35124409448818905, "calib/std_conf": 0.0052817686236865434, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9092824773413897, "calib/step_q_c_n": 1324.0, "calib/step_q_gap": 0.03022197505828461, "calib/step_q_w": 0.8790605022831051, "calib/step_q_w_n": 876.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 699.015625, "completions/mean_terminated_length": 701.7568969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 326.0, "epoch": 0.1408, "grad_norm": 0.1599433869123459, "learning_rate": 1.916666666666667e-06, "loss": 0.0201, "num_tokens": 34540092.0, "reward": 1.1328125, "reward_std": 0.24816462397575378, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.9921875, "step": 132 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.5425099601593626, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": 0.0010279594715916218, "calib/mean_conf": 0.9927091633466136, "calib/mu_c": 0.9932743362831858, "calib/mu_w": 0.9922463768115942, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5425099601593626, "calib/std_conf": 0.007510356441719955, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9057649667405765, "calib/step_q_c_n": 902.0, "calib/step_q_gap": -0.0019483775757331845, "calib/step_q_w": 0.9077133443163097, "calib/step_q_w_n": 1214.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 755.53125, "completions/mean_terminated_length": 767.5238647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 318.0, "epoch": 0.14186666666666667, "grad_norm": 0.17682072520256042, "learning_rate": 1.888888888888889e-06, "loss": 0.0028, "num_tokens": 34839852.0, "reward": 0.931640625, "reward_std": 0.30837512016296387, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 0.98046875, "step": 133 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.7265625, "calib/ece": 0.4609274193548387, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00016980146290501708, "calib/mean_conf": 0.9931854838709677, "calib/mu_c": 0.9931060606060604, "calib/mu_w": 0.9932758620689655, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4609274193548387, "calib/std_conf": 0.005229980163232401, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9068285714285715, "calib/step_q_c_n": 1050.0, "calib/step_q_gap": -0.003467036679536517, "calib/step_q_w": 0.9102956081081081, "calib/step_q_w_n": 1184.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 744.5546875, "completions/mean_terminated_length": 762.4240112304688, "completions/min_length": 0.0, "completions/min_terminated_length": 390.0, "epoch": 0.14293333333333333, "grad_norm": 0.17197610437870026, "learning_rate": 1.8611111111111113e-06, "loss": -0.028, "num_tokens": 35139410.0, "reward": 1.0, "reward_std": 0.29847827553749084, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.96875, "step": 134 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.828125, "calib/ece": 0.38011811023622055, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005075876504448962, "calib/mean_conf": 0.9942913385826773, "calib/mu_c": 0.9944871794871796, "calib/mu_w": 0.9939795918367347, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.38011811023622055, "calib/std_conf": 0.004949525128292414, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9111734317343174, "calib/step_q_c_n": 1355.0, "calib/step_q_gap": 0.001626470408350622, "calib/step_q_w": 0.9095469613259668, "calib/step_q_w_n": 905.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2160.0, "completions/max_terminated_length": 2160.0, "completions/mean_length": 747.48828125, "completions/mean_terminated_length": 750.419677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 409.0, "epoch": 0.144, "grad_norm": 0.18103228509426117, "learning_rate": 1.8333333333333333e-06, "loss": 0.0169, "num_tokens": 35436647.0, "reward": 1.103515625, "reward_std": 0.2690998911857605, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.98828125, "step": 135 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 9.12890625, "calib/ece": 0.44757936507936513, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0014836003051106328, "calib/mean_conf": 0.9951984126984128, "calib/mu_c": 0.9958695652173913, "calib/mu_w": 0.9943859649122807, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.44757936507936513, "calib/std_conf": 0.004996061689081574, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9105586152635721, "calib/step_q_c_n": 1271.0, "calib/step_q_gap": 0.016046420141620832, "calib/step_q_w": 0.8945121951219512, "calib/step_q_w_n": 1066.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2100.0, "completions/max_terminated_length": 2100.0, "completions/mean_length": 711.12109375, "completions/mean_terminated_length": 722.4087524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 383.0, "epoch": 0.14506666666666668, "grad_norm": 0.17702247202396393, "learning_rate": 1.8055555555555557e-06, "loss": -0.024, "num_tokens": 35727182.0, "reward": 1.029296875, "reward_std": 0.28243717551231384, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.44140625, "calib/ece": 0.4384196547144754, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": 0.006667951353203083, "calib/mean_conf": 0.9922045152722444, "calib/mu_c": 0.9951798561151078, "calib/mu_w": 0.9885119047619048, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4384196547144754, "calib/std_conf": 0.04202500106425627, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9080967741935484, "calib/step_q_c_n": 1240.0, "calib/step_q_gap": 0.01070510044673445, "calib/step_q_w": 0.897391673746814, "calib/step_q_w_n": 1177.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 715.8984375, "completions/mean_terminated_length": 724.3873901367188, "completions/min_length": 0.0, "completions/min_terminated_length": 388.0, "epoch": 0.14613333333333334, "grad_norm": 0.16152194142341614, "learning_rate": 1.777777777777778e-06, "loss": 0.0062, "num_tokens": 36017436.0, "reward": 1.033203125, "reward_std": 0.22543761134147644, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.98046875, "step": 137 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 9.359375, "calib/ece": 0.3457661290322581, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00015063896623102035, "calib/mean_conf": 0.9949596774193549, "calib/mu_c": 0.9949068322981367, "calib/mu_w": 0.9950574712643677, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3457661290322581, "calib/std_conf": 0.0049998374063053444, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9099426111908178, "calib/step_q_c_n": 1394.0, "calib/step_q_gap": 0.03896456727864206, "calib/step_q_w": 0.8709780439121757, "calib/step_q_w_n": 1002.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2179.0, "completions/max_terminated_length": 2179.0, "completions/mean_length": 723.10546875, "completions/mean_terminated_length": 737.510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 316.0, "epoch": 0.1472, "grad_norm": 0.18175871670246124, "learning_rate": 1.75e-06, "loss": -0.0157, "num_tokens": 36306887.0, "reward": 1.11328125, "reward_std": 0.29471495747566223, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.96875, "step": 138 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.90234375, "calib/ece": 0.3306048387096775, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0007601314348301136, "calib/mean_conf": 0.9959274193548387, "calib/mu_c": 0.9961818181818182, "calib/mu_w": 0.995421686746988, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3306048387096775, "calib/std_conf": 0.00499463161335987, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9098336948662328, "calib/step_q_c_n": 1383.0, "calib/step_q_gap": 0.009576998437661355, "calib/step_q_w": 0.9002566964285714, "calib/step_q_w_n": 896.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 657.640625, "completions/mean_terminated_length": 668.0794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.14826666666666666, "grad_norm": 0.17501947283744812, "learning_rate": 1.7222222222222224e-06, "loss": -0.0145, "num_tokens": 36578339.0, "reward": 1.12890625, "reward_std": 0.24773260951042175, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.96875, "step": 139 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 9.14453125, "calib/ece": 0.29529880478087644, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0004363636363635681, "calib/mean_conf": 0.9964940239043825, "calib/mu_c": 0.9963636363636365, "calib/mu_w": 0.9968, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29529880478087644, "calib/std_conf": 0.004771571289746575, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9106408544726302, "calib/step_q_c_n": 1498.0, "calib/step_q_gap": -7.326177885258467e-05, "calib/step_q_w": 0.9107141162514828, "calib/step_q_w_n": 843.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 686.55078125, "completions/mean_terminated_length": 697.4484252929688, "completions/min_length": 0.0, "completions/min_terminated_length": 355.0, "epoch": 0.14933333333333335, "grad_norm": 0.16100062429904938, "learning_rate": 1.6944444444444446e-06, "loss": -0.0091, "num_tokens": 36859112.0, "reward": 1.177734375, "reward_std": 0.2540324330329895, "rewards/accuracy_reward_step": 0.6875, "rewards/format_reward_step": 0.98046875, "step": 140 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.359375, "calib/ece": 0.39397637795275586, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0008250825082509738, "calib/mean_conf": 0.9963385826771654, "calib/mu_c": 0.9966666666666667, "calib/mu_w": 0.9958415841584157, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39397637795275586, "calib/std_conf": 0.004898530186820943, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9082588235294118, "calib/step_q_c_n": 1275.0, "calib/step_q_gap": -0.00031921115266919653, "calib/step_q_w": 0.908578034682081, "calib/step_q_w_n": 865.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 741.88671875, "completions/mean_terminated_length": 744.796142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 344.0, "epoch": 0.1504, "grad_norm": 0.15197478234767914, "learning_rate": 1.6666666666666667e-06, "loss": 0.0091, "num_tokens": 37156131.0, "reward": 1.09375, "reward_std": 0.22331982851028442, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9921875, "step": 141 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 8.453125, "calib/ece": 0.4255204216073781, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": -0.004131545338441844, "calib/mean_conf": 0.993372859025033, "calib/mu_c": 0.9916091954022991, "calib/mu_w": 0.9957407407407409, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42288537549407107, "calib/std_conf": 0.041865252417557876, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.911143818334735, "calib/step_q_c_n": 1189.0, "calib/step_q_gap": 0.00012843371935034043, "calib/step_q_w": 0.9110153846153847, "calib/step_q_w_n": 975.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1525.0, "completions/max_terminated_length": 1525.0, "completions/mean_length": 720.359375, "completions/mean_terminated_length": 726.031494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 337.0, "epoch": 0.15146666666666667, "grad_norm": 0.15058152377605438, "learning_rate": 1.638888888888889e-06, "loss": -0.0109, "num_tokens": 37445703.0, "reward": 1.064453125, "reward_std": 0.20312634110450745, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.98828125, "step": 142 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.65234375, "calib/ece": 0.4366532258064517, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004573955514487471, "calib/mean_conf": 0.9971370967741936, "calib/mu_c": 0.9973381294964029, "calib/mu_w": 0.9968807339449541, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4366532258064517, "calib/std_conf": 0.004520267401131438, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9136747967479675, "calib/step_q_c_n": 1230.0, "calib/step_q_gap": 0.008420989641368504, "calib/step_q_w": 0.905253807106599, "calib/step_q_w_n": 985.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2607.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 722.49609375, "completions/mean_terminated_length": 733.96435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 348.0, "epoch": 0.15253333333333333, "grad_norm": 0.19313447177410126, "learning_rate": 1.6111111111111113e-06, "loss": 0.0046, "num_tokens": 37737998.0, "reward": 1.02734375, "reward_std": 0.2798112630844116, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.96875, "step": 143 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 8.4140625, "calib/ece": 0.27803921568627454, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00039845173041908044, "calib/mean_conf": 0.995686274509804, "calib/mu_c": 0.9955737704918033, "calib/mu_w": 0.9959722222222224, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27803921568627454, "calib/std_conf": 0.004952678800123569, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9118355263157897, "calib/step_q_c_n": 1520.0, "calib/step_q_gap": 8.47376722564741e-05, "calib/step_q_w": 0.9117507886435332, "calib/step_q_w_n": 634.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 728.23828125, "completions/mean_terminated_length": 728.23828125, "completions/min_length": 399.0, "completions/min_terminated_length": 399.0, "epoch": 0.1536, "grad_norm": 0.13797351717948914, "learning_rate": 1.5833333333333333e-06, "loss": 0.0165, "num_tokens": 38028555.0, "reward": 1.212890625, "reward_std": 0.14506946504116058, "rewards/accuracy_reward_step": 0.71484375, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.28125, "calib/ece": 0.3081600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 8.944543828381946e-06, "calib/mean_conf": 0.99616, "calib/mu_c": 0.9961627906976744, "calib/mu_w": 0.9961538461538461, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3081600000000001, "calib/std_conf": 0.004863578929142616, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.913320184089415, "calib/step_q_c_n": 1521.0, "calib/step_q_gap": 0.001121353680058279, "calib/step_q_w": 0.9121988304093567, "calib/step_q_w_n": 855.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1635.0, "completions/max_terminated_length": 1635.0, "completions/mean_length": 718.89453125, "completions/mean_terminated_length": 736.1480102539062, "completions/min_length": 0.0, "completions/min_terminated_length": 354.0, "epoch": 0.15466666666666667, "grad_norm": 0.18962031602859497, "learning_rate": 1.5555555555555558e-06, "loss": -0.0321, "num_tokens": 38315296.0, "reward": 1.158203125, "reward_std": 0.3293054699897766, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 0.97265625, "step": 145 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.82421875, "calib/ece": 0.5, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0008960573476700651, "calib/mean_conf": 0.996, "calib/mu_c": 0.9964516129032257, "calib/mu_w": 0.9955555555555556, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5, "calib/std_conf": 0.00489897948556636, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9110076045627378, "calib/step_q_c_n": 1052.0, "calib/step_q_gap": 0.0008750444964575887, "calib/step_q_w": 0.9101325600662802, "calib/step_q_w_n": 1207.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2443.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 741.7421875, "completions/mean_terminated_length": 753.5159301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 366.0, "epoch": 0.15573333333333333, "grad_norm": 0.20323771238327026, "learning_rate": 1.527777777777778e-06, "loss": -0.0212, "num_tokens": 38612398.0, "reward": 0.97265625, "reward_std": 0.27729031443595886, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.9765625, "step": 146 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.69140625, "calib/ece": 0.5288047808764942, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.000288302079347047, "calib/mean_conf": 0.9949402390438248, "calib/mu_c": 0.9947863247863247, "calib/mu_w": 0.9950746268656717, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5288047808764942, "calib/std_conf": 0.004999642850056097, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9134854368932038, "calib/step_q_c_n": 1030.0, "calib/step_q_gap": -0.00045598570093852153, "calib/step_q_w": 0.9139414225941423, "calib/step_q_w_n": 1195.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 753.86328125, "completions/mean_terminated_length": 759.7991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 426.0, "epoch": 0.1568, "grad_norm": 0.1743156462907791, "learning_rate": 1.5e-06, "loss": 0.0223, "num_tokens": 38909067.0, "reward": 0.947265625, "reward_std": 0.20046457648277283, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.98046875, "step": 147 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 8.46484375, "calib/ece": 0.33278884462151404, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.000672572643515057, "calib/mean_conf": 0.9941434262948208, "calib/mu_c": 0.9939156626506025, "calib/mu_w": 0.9945882352941176, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33278884462151404, "calib/std_conf": 0.004926081758111154, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9119335219236209, "calib/step_q_c_n": 1414.0, "calib/step_q_gap": 0.010459418337963378, "calib/step_q_w": 0.9014741035856575, "calib/step_q_w_n": 753.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 730.67578125, "completions/mean_terminated_length": 736.4291381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 406.0, "epoch": 0.15786666666666666, "grad_norm": 0.1720433533191681, "learning_rate": 1.4722222222222225e-06, "loss": 0.0099, "num_tokens": 39201232.0, "reward": 1.138671875, "reward_std": 0.21238605678081512, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.98046875, "step": 148 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.140625, "calib/ece": 0.4248387096774193, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0011904288460262702, "calib/mean_conf": 0.9933870967741936, "calib/mu_c": 0.9939007092198581, "calib/mu_w": 0.9927102803738318, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4248387096774193, "calib/std_conf": 0.004732709919716523, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9122546419098143, "calib/step_q_c_n": 1131.0, "calib/step_q_gap": 0.002338587345281251, "calib/step_q_w": 0.9099160545645331, "calib/step_q_w_n": 953.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2728.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 774.7109375, "completions/mean_terminated_length": 783.8972778320312, "completions/min_length": 0.0, "completions/min_terminated_length": 320.0, "epoch": 0.15893333333333334, "grad_norm": 0.18674218654632568, "learning_rate": 1.4444444444444445e-06, "loss": 0.0311, "num_tokens": 39504014.0, "reward": 1.03515625, "reward_std": 0.30080240964889526, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.96875, "step": 149 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 9.09375, "calib/ece": 0.35622132253711214, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9959514170040485, "calib/gap": -0.0036106765277578168, "calib/mean_conf": 0.9904993252361675, "calib/mu_c": 0.9891983122362871, "calib/mu_w": 0.9928089887640449, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35352226720647784, "calib/std_conf": 0.042155943753956086, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9152635486265775, "calib/step_q_c_n": 1347.0, "calib/step_q_gap": 0.003714109278973088, "calib/step_q_w": 0.9115494393476045, "calib/step_q_w_n": 981.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 683.05078125, "completions/mean_terminated_length": 705.0846557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 318.0, "epoch": 0.16, "grad_norm": 0.1734960377216339, "learning_rate": 1.4166666666666667e-06, "loss": -0.0502, "num_tokens": 39783835.0, "reward": 1.099609375, "reward_std": 0.20597529411315918, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.96484375, "step": 150 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.5041463414634146, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0007460317460317167, "calib/mean_conf": 0.9919512195121951, "calib/mu_c": 0.9923333333333334, "calib/mu_w": 0.9915873015873017, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5041463414634146, "calib/std_conf": 0.003962945563237058, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9136016096579478, "calib/step_q_c_n": 994.0, "calib/step_q_gap": 0.0042485099274894855, "calib/step_q_w": 0.9093530997304583, "calib/step_q_w_n": 1113.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 829.67578125, "completions/mean_terminated_length": 839.5138549804688, "completions/min_length": 0.0, "completions/min_terminated_length": 424.0, "epoch": 0.16106666666666666, "grad_norm": 0.18515925109386444, "learning_rate": 1.3888888888888892e-06, "loss": 0.0253, "num_tokens": 40103256.0, "reward": 0.947265625, "reward_std": 0.31336262822151184, "rewards/accuracy_reward_step": 0.46875, "rewards/format_reward_step": 0.95703125, "step": 151 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 9.5625, "calib/ece": 0.46032365145228227, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -8.0475663716717e-05, "calib/mean_conf": 0.9914439834024897, "calib/mu_c": 0.99140625, "calib/mu_w": 0.9914867256637168, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.46032365145228227, "calib/std_conf": 0.0035054751342345866, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9150719424460431, "calib/step_q_c_n": 1112.0, "calib/step_q_gap": 0.00028825981131241907, "calib/step_q_w": 0.9147836826347306, "calib/step_q_w_n": 1336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2681.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 802.953125, "completions/mean_terminated_length": 825.5260620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 436.0, "epoch": 0.16213333333333332, "grad_norm": 0.18708793818950653, "learning_rate": 1.3611111111111112e-06, "loss": -0.0153, "num_tokens": 40414204.0, "reward": 0.970703125, "reward_std": 0.2786449193954468, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.94140625, "step": 152 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.43359375, "calib/ece": 0.40223577235772345, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00036394673950157497, "calib/mean_conf": 0.9916666666666666, "calib/mu_c": 0.9915172413793102, "calib/mu_w": 0.9918811881188118, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40223577235772345, "calib/std_conf": 0.003726779962499653, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9149527085124677, "calib/step_q_c_n": 1163.0, "calib/step_q_gap": -0.00010753245138783729, "calib/step_q_w": 0.9150602409638555, "calib/step_q_w_n": 996.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 781.5625, "completions/mean_terminated_length": 803.5341186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 372.0, "epoch": 0.1632, "grad_norm": 0.16443714499473572, "learning_rate": 1.3333333333333334e-06, "loss": -0.0299, "num_tokens": 40721604.0, "reward": 1.05078125, "reward_std": 0.2820996046066284, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.9609375, "step": 153 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.9375, "calib/ece": 0.48970281124498005, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0005599354838710102, "calib/mean_conf": 0.9917108433734941, "calib/mu_c": 0.9914319999999999, "calib/mu_w": 0.9919919354838709, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48970281124498005, "calib/std_conf": 0.0037476529323794885, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.920658424908425, "calib/step_q_c_n": 1092.0, "calib/step_q_gap": -0.000581541646759054, "calib/step_q_w": 0.921239966555184, "calib/step_q_w_n": 1196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2099.0, "completions/max_terminated_length": 2099.0, "completions/mean_length": 731.42578125, "completions/mean_terminated_length": 751.9879150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 382.0, "epoch": 0.16426666666666667, "grad_norm": 0.16835734248161316, "learning_rate": 1.3055555555555556e-06, "loss": -0.0415, "num_tokens": 41013289.0, "reward": 0.974609375, "reward_std": 0.212571382522583, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.97265625, "step": 154 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 9.4140625, "calib/ece": 0.4033960000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000409286044515178, "calib/mean_conf": 0.991396, "calib/mu_c": 0.9915646258503401, "calib/mu_w": 0.9911553398058249, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4033960000000001, "calib/std_conf": 0.003460517880317919, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9184092606422704, "calib/step_q_c_n": 1339.0, "calib/step_q_gap": -0.000744800982379501, "calib/step_q_w": 0.9191540616246499, "calib/step_q_w_n": 1071.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2281.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 785.921875, "completions/mean_terminated_length": 798.3968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 436.0, "epoch": 0.16533333333333333, "grad_norm": 0.18231578171253204, "learning_rate": 1.2777777777777779e-06, "loss": -0.0423, "num_tokens": 41321701.0, "reward": 1.0625, "reward_std": 0.26381197571754456, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9765625, "step": 155 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 11.0078125, "calib/ece": 0.4370333333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00015852715901898407, "calib/mean_conf": 0.9912000000000002, "calib/mu_c": 0.9912706766917293, "calib/mu_w": 0.9911121495327103, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4370333333333335, "calib/std_conf": 0.0032380549717384386, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9186949286846275, "calib/step_q_c_n": 1262.0, "calib/step_q_gap": -0.00517396591691488, "calib/step_q_w": 0.9238688946015424, "calib/step_q_w_n": 1556.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 781.55078125, "completions/mean_terminated_length": 816.6408081054688, "completions/min_length": 0.0, "completions/min_terminated_length": 383.0, "epoch": 0.1664, "grad_norm": 0.1721334308385849, "learning_rate": 1.25e-06, "loss": -0.0415, "num_tokens": 41626538.0, "reward": 0.98828125, "reward_std": 0.24574384093284607, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.9375, "step": 156 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 11.01953125, "calib/ece": 0.34683673469387755, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00010548523206788119, "calib/mean_conf": 0.991734693877551, "calib/mu_c": 0.9917721518987341, "calib/mu_w": 0.9916666666666663, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34683673469387755, "calib/std_conf": 0.004374207847604431, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9220989974937344, "calib/step_q_c_n": 1596.0, "calib/step_q_gap": -0.006978553526673759, "calib/step_q_w": 0.9290775510204081, "calib/step_q_w_n": 1225.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 792.1328125, "completions/mean_terminated_length": 824.3333129882812, "completions/min_length": 0.0, "completions/min_terminated_length": 462.0, "epoch": 0.16746666666666668, "grad_norm": 0.1614348441362381, "learning_rate": 1.2222222222222223e-06, "loss": -0.0904, "num_tokens": 41933052.0, "reward": 1.095703125, "reward_std": 0.23183158040046692, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.95703125, "step": 157 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 10.63671875, "calib/ece": 0.34127572016460905, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00046909903201775016, "calib/mean_conf": 0.9914814814814815, "calib/mu_c": 0.9916455696202529, "calib/mu_w": 0.9911764705882351, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34127572016460905, "calib/std_conf": 0.0035524677950464624, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9242605863192183, "calib/step_q_c_n": 1535.0, "calib/step_q_gap": -0.010730996172364171, "calib/step_q_w": 0.9349915824915824, "calib/step_q_w_n": 1188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 771.078125, "completions/mean_terminated_length": 802.4227294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 465.0, "epoch": 0.16853333333333334, "grad_norm": 0.17561709880828857, "learning_rate": 1.1944444444444446e-06, "loss": -0.0555, "num_tokens": 42235688.0, "reward": 1.091796875, "reward_std": 0.28867921233177185, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.94921875, "step": 158 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 13.3984375, "calib/ece": 0.3978744588744588, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0007016617487191068, "calib/mean_conf": 0.9909480519480519, "calib/mu_c": 0.9912335766423358, "calib/mu_w": 0.9905319148936167, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3978744588744588, "calib/std_conf": 0.0029227993337472484, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9257992481203008, "calib/step_q_c_n": 1330.0, "calib/step_q_gap": -0.025595989974937394, "calib/step_q_w": 0.9513952380952382, "calib/step_q_w_n": 2100.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2909.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 742.56640625, "completions/mean_terminated_length": 815.866943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 503.0, "epoch": 0.1696, "grad_norm": 0.19220517575740814, "learning_rate": 1.1666666666666668e-06, "loss": -0.1328, "num_tokens": 42530569.0, "reward": 0.986328125, "reward_std": 0.4323299527168274, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.90234375, "step": 159 }, { "calib/answer_extract_rate": 0.90234375, "calib/avg_num_step_conf": 12.61328125, "calib/ece": 0.3415541125541126, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006518518518516814, "calib/mean_conf": 0.990904761904762, "calib/mu_c": 0.9911333333333332, "calib/mu_w": 0.9904814814814815, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3415541125541126, "calib/std_conf": 0.002861828769222511, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9232820855614974, "calib/step_q_c_n": 1496.0, "calib/step_q_gap": -0.017290909245196162, "calib/step_q_w": 0.9405729948066935, "calib/step_q_w_n": 1733.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2659.0, "completions/max_terminated_length": 2659.0, "completions/mean_length": 787.7890625, "completions/mean_terminated_length": 847.3698120117188, "completions/min_length": 0.0, "completions/min_terminated_length": 477.0, "epoch": 0.17066666666666666, "grad_norm": 0.1917860209941864, "learning_rate": 1.138888888888889e-06, "loss": -0.062, "num_tokens": 42837083.0, "reward": 1.037109375, "reward_std": 0.3205576539039612, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.90234375, "step": 160 }, { "calib/answer_extract_rate": 0.8671875, "calib/avg_num_step_conf": 14.1953125, "calib/ece": 0.2570045045045045, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00030206925236542226, "calib/mean_conf": 0.9912387387387388, "calib/mu_c": 0.9913190184049079, "calib/mu_w": 0.9910169491525425, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2570045045045045, "calib/std_conf": 0.0032772398618231705, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9238732394366197, "calib/step_q_c_n": 1633.0, "calib/step_q_gap": -0.0283731373749746, "calib/step_q_w": 0.9522463768115943, "calib/step_q_w_n": 2001.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 737.0234375, "completions/mean_terminated_length": 831.1806030273438, "completions/min_length": 0.0, "completions/min_terminated_length": 461.0, "epoch": 0.17173333333333332, "grad_norm": 0.18176619708538055, "learning_rate": 1.111111111111111e-06, "loss": -0.1118, "num_tokens": 43129681.0, "reward": 1.068359375, "reward_std": 0.368930459022522, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.86328125, "step": 161 }, { "calib/answer_extract_rate": 0.89453125, "calib/avg_num_step_conf": 13.8125, "calib/ece": 0.3053580786026201, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0009972576079262385, "calib/mean_conf": 0.9909475982532752, "calib/mu_c": 0.9912611464968152, "calib/mu_w": 0.9902638888888889, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3053580786026201, "calib/std_conf": 0.002908631319540183, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9286242955541641, "calib/step_q_c_n": 1597.0, "calib/step_q_gap": -0.021603141268940584, "calib/step_q_w": 0.9502274368231047, "calib/step_q_w_n": 1939.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 747.8828125, "completions/mean_terminated_length": 825.25, "completions/min_length": 0.0, "completions/min_terminated_length": 465.0, "epoch": 0.1728, "grad_norm": 0.1692075878381729, "learning_rate": 1.0833333333333335e-06, "loss": -0.1072, "num_tokens": 43425283.0, "reward": 1.060546875, "reward_std": 0.302878737449646, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.89453125, "step": 162 }, { "calib/answer_extract_rate": 0.89453125, "calib/avg_num_step_conf": 13.2734375, "calib/ece": 0.412695652173913, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0006635144562439077, "calib/mean_conf": 0.9909565217391304, "calib/mu_c": 0.9906766917293233, "calib/mu_w": 0.9913402061855672, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.412695652173913, "calib/std_conf": 0.0043686458042019065, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9277293233082707, "calib/step_q_c_n": 1330.0, "calib/step_q_gap": -0.019925415569872373, "calib/step_q_w": 0.9476547388781431, "calib/step_q_w_n": 2068.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 806.09765625, "completions/mean_terminated_length": 878.1318969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 427.0, "epoch": 0.17386666666666667, "grad_norm": 0.16379491984844208, "learning_rate": 1.0555555555555557e-06, "loss": -0.1066, "num_tokens": 43736476.0, "reward": 0.966796875, "reward_std": 0.30104950070381165, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.89453125, "step": 163 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 11.84765625, "calib/ece": 0.3810987124463521, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.7885776195587546e-05, "calib/mean_conf": 0.990540772532189, "calib/mu_c": 0.9905633802816899, "calib/mu_w": 0.9905054945054943, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3810987124463521, "calib/std_conf": 0.0024122920742030173, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9231641791044776, "calib/step_q_c_n": 1340.0, "calib/step_q_gap": -0.014627905951635878, "calib/step_q_w": 0.9377920850561134, "calib/step_q_w_n": 1693.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 821.84765625, "completions/mean_terminated_length": 891.4957885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 505.0, "epoch": 0.17493333333333333, "grad_norm": 0.16550104320049286, "learning_rate": 1.0277777777777777e-06, "loss": -0.091, "num_tokens": 44053005.0, "reward": 1.009765625, "reward_std": 0.2756240665912628, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.91015625, "step": 164 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 10.8671875, "calib/ece": 0.4410247933884298, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00012788852866119083, "calib/mean_conf": 0.9906115702479339, "calib/mu_c": 0.9906691729323307, "calib/mu_w": 0.9905412844036695, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4410247933884298, "calib/std_conf": 0.0023806100365015024, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9178924103419517, "calib/step_q_c_n": 1199.0, "calib/step_q_gap": -0.019873856240486876, "calib/step_q_w": 0.9377662665824386, "calib/step_q_w_n": 1583.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2114.0, "completions/max_terminated_length": 2114.0, "completions/mean_length": 832.63671875, "completions/mean_terminated_length": 873.5859985351562, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.176, "grad_norm": 0.1745368391275406, "learning_rate": 1.0000000000000002e-06, "loss": -0.0757, "num_tokens": 44371736.0, "reward": 0.990234375, "reward_std": 0.3081328272819519, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.94140625, "step": 165 }, { "calib/answer_extract_rate": 0.875, "calib/avg_num_step_conf": 10.984375, "calib/ece": 0.28958035714285724, "calib/final_conf_rate": 0.875, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00010019963874885462, "calib/mean_conf": 0.9904732142857143, "calib/mu_c": 0.9905031847133755, "calib/mu_w": 0.9904029850746267, "calib/nonempty_final_conf_rate": 0.875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28958035714285724, "calib/std_conf": 0.0020850651815289846, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9221847307430131, "calib/step_q_c_n": 1467.0, "calib/step_q_gap": -0.00874612427557453, "calib/step_q_w": 0.9309308550185876, "calib/step_q_w_n": 1345.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2750.0, "completions/max_terminated_length": 2750.0, "completions/mean_length": 808.18359375, "completions/mean_terminated_length": 895.6493530273438, "completions/min_length": 0.0, "completions/min_terminated_length": 452.0, "epoch": 0.17706666666666668, "grad_norm": 0.1618948131799698, "learning_rate": 9.722222222222224e-07, "loss": -0.1082, "num_tokens": 44684815.0, "reward": 1.05078125, "reward_std": 0.3207697868347168, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.875, "step": 166 }, { "calib/answer_extract_rate": 0.93359375, "calib/avg_num_step_conf": 10.26953125, "calib/ece": 0.32927615062761517, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00017768401312734383, "calib/mean_conf": 0.9903640167364017, "calib/mu_c": 0.9903037974683542, "calib/mu_w": 0.9904814814814815, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32927615062761517, "calib/std_conf": 0.0018424680372639151, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9210435365036839, "calib/step_q_c_n": 1493.0, "calib/step_q_gap": -0.014997836735752834, "calib/step_q_w": 0.9360413732394367, "calib/step_q_w_n": 1136.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 782.7734375, "completions/mean_terminated_length": 824.6502075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 500.0, "epoch": 0.17813333333333334, "grad_norm": 0.1658693253993988, "learning_rate": 9.444444444444445e-07, "loss": -0.0328, "num_tokens": 44990813.0, "reward": 1.083984375, "reward_std": 0.25721055269241333, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.93359375, "step": 167 }, { "calib/answer_extract_rate": 0.91796875, "calib/avg_num_step_conf": 10.578125, "calib/ece": 0.30531489361702135, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0001740809132110277, "calib/mean_conf": 0.9904212765957447, "calib/mu_c": 0.9903664596273293, "calib/mu_w": 0.9905405405405403, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30531489361702135, "calib/std_conf": 0.001999248387515709, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9184727272727273, "calib/step_q_c_n": 1485.0, "calib/step_q_gap": -0.005324492678213, "calib/step_q_w": 0.9237972199509403, "calib/step_q_w_n": 1223.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 827.09375, "completions/mean_terminated_length": 882.2333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 467.0, "epoch": 0.1792, "grad_norm": 0.18554162979125977, "learning_rate": 9.166666666666666e-07, "loss": -0.1005, "num_tokens": 45307221.0, "reward": 1.087890625, "reward_std": 0.41108518838882446, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.91796875, "step": 168 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 10.1875, "calib/ece": 0.3830495867768595, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00030017901897638755, "calib/mean_conf": 0.9904876033057851, "calib/mu_c": 0.9906054421768707, "calib/mu_w": 0.9903052631578944, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3830495867768595, "calib/std_conf": 0.002136327668862951, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9197799999999999, "calib/step_q_c_n": 1350.0, "calib/step_q_gap": -0.0173026709062003, "calib/step_q_w": 0.9370826709062002, "calib/step_q_w_n": 1258.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 835.328125, "completions/mean_terminated_length": 876.4097900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 432.0, "epoch": 0.18026666666666666, "grad_norm": 0.1450500339269638, "learning_rate": 8.88888888888889e-07, "loss": -0.0578, "num_tokens": 45625249.0, "reward": 1.046875, "reward_std": 0.22004562616348267, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9453125, "step": 169 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 9.140625, "calib/ece": 0.3747272727272729, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0001228260085154309, "calib/mean_conf": 0.9904297520661158, "calib/mu_c": 0.9903825503355703, "calib/mu_w": 0.9905053763440858, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3747272727272729, "calib/std_conf": 0.0019722305460971212, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9167304542069992, "calib/step_q_c_n": 1343.0, "calib/step_q_gap": -0.007478171670633915, "calib/step_q_w": 0.9242086258776331, "calib/step_q_w_n": 997.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2763.0, "completions/max_terminated_length": 2763.0, "completions/mean_length": 811.234375, "completions/mean_terminated_length": 844.2113647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 377.0, "epoch": 0.18133333333333335, "grad_norm": 0.180429607629776, "learning_rate": 8.611111111111112e-07, "loss": -0.0272, "num_tokens": 45937077.0, "reward": 1.0546875, "reward_std": 0.3228003978729248, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.9453125, "step": 170 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 9.11328125, "calib/ece": 0.5066803278688524, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0002273338714018358, "calib/mean_conf": 0.9902868852459016, "calib/mu_c": 0.9901694915254236, "calib/mu_w": 0.9903968253968254, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5066803278688524, "calib/std_conf": 0.0016692960536406824, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9185578747628085, "calib/step_q_c_n": 1054.0, "calib/step_q_gap": 0.0031591257401344386, "calib/step_q_w": 0.915398749022674, "calib/step_q_w_n": 1279.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 805.4296875, "completions/mean_terminated_length": 838.1707153320312, "completions/min_length": 0.0, "completions/min_terminated_length": 463.0, "epoch": 0.1824, "grad_norm": 0.17943838238716125, "learning_rate": 8.333333333333333e-07, "loss": -0.0565, "num_tokens": 46250163.0, "reward": 0.9375, "reward_std": 0.32925909757614136, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.953125, "step": 171 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.5859375, "calib/ece": 0.2671767068273093, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0002864734299514726, "calib/mean_conf": 0.9900682730923696, "calib/mu_c": 0.9899888888888888, "calib/mu_w": 0.9902753623188403, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2671767068273093, "calib/std_conf": 0.003195025851300865, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9176888454011741, "calib/step_q_c_n": 1533.0, "calib/step_q_gap": 0.0047114017921515705, "calib/step_q_w": 0.9129774436090226, "calib/step_q_w_n": 665.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2987.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 776.5703125, "completions/mean_terminated_length": 795.2080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.18346666666666667, "grad_norm": 0.1873815506696701, "learning_rate": 8.055555555555557e-07, "loss": -0.0292, "num_tokens": 46552317.0, "reward": 1.189453125, "reward_std": 0.30480220913887024, "rewards/accuracy_reward_step": 0.703125, "rewards/format_reward_step": 0.97265625, "step": 172 }, { "calib/answer_extract_rate": 0.953125, "calib/avg_num_step_conf": 9.6171875, "calib/ece": 0.33864344262295076, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00027014428412885927, "calib/mean_conf": 0.9902827868852458, "calib/mu_c": 0.9901886792452826, "calib/mu_w": 0.9904588235294115, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33864344262295076, "calib/std_conf": 0.0016465160746528724, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9175739644970413, "calib/step_q_c_n": 1521.0, "calib/step_q_gap": -0.0043410195624697945, "calib/step_q_w": 0.9219149840595111, "calib/step_q_w_n": 941.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 846.43359375, "completions/mean_terminated_length": 870.2288818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 371.0, "epoch": 0.18453333333333333, "grad_norm": 0.16905918717384338, "learning_rate": 7.777777777777779e-07, "loss": 0.001, "num_tokens": 46872164.0, "reward": 1.09765625, "reward_std": 0.251212477684021, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.953125, "step": 173 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 9.7734375, "calib/ece": 0.4460168776371308, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.026701119708154e-05, "calib/mean_conf": 0.9903206751054853, "calib/mu_c": 0.9903023255813954, "calib/mu_w": 0.9903425925925925, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4460168776371308, "calib/std_conf": 0.0019483073817899558, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9168514588859417, "calib/step_q_c_n": 1131.0, "calib/step_q_gap": -0.004921699392687051, "calib/step_q_w": 0.9217731582786287, "calib/step_q_w_n": 1371.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3056.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 894.421875, "completions/mean_terminated_length": 946.1652221679688, "completions/min_length": 0.0, "completions/min_terminated_length": 491.0, "epoch": 0.1856, "grad_norm": 0.15766571462154388, "learning_rate": 7.5e-07, "loss": -0.0503, "num_tokens": 47205368.0, "reward": 0.96484375, "reward_std": 0.3347104787826538, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.921875, "step": 174 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 8.74609375, "calib/ece": 0.5440375, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.9958333333333333, "calib/gap": -0.0007787927763334146, "calib/mean_conf": 0.9898708333333334, "calib/mu_c": 0.9894392523364485, "calib/mu_w": 0.9902180451127819, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5440375, "calib/std_conf": 0.00601283205366286, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9203290870488323, "calib/step_q_c_n": 942.0, "calib/step_q_gap": 0.0061270824227721565, "calib/step_q_w": 0.9142020046260602, "calib/step_q_w_n": 1297.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 832.51953125, "completions/mean_terminated_length": 862.854248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 507.0, "epoch": 0.18666666666666668, "grad_norm": 0.1711643487215042, "learning_rate": 7.222222222222222e-07, "loss": -0.0372, "num_tokens": 47524317.0, "reward": 0.890625, "reward_std": 0.2728787660598755, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.9375, "step": 175 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 9.43359375, "calib/ece": 0.41413168724279836, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00020436893203912287, "calib/mean_conf": 0.9902633744855968, "calib/mu_c": 0.99035, "calib/mu_w": 0.9901456310679608, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41413168724279836, "calib/std_conf": 0.002713435579808148, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9192688870836719, "calib/step_q_c_n": 1231.0, "calib/step_q_gap": 0.018897265462050394, "calib/step_q_w": 0.9003716216216215, "calib/step_q_w_n": 1184.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 817.59765625, "completions/mean_terminated_length": 847.388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 389.0, "epoch": 0.18773333333333334, "grad_norm": 0.17403370141983032, "learning_rate": 6.944444444444446e-07, "loss": -0.0415, "num_tokens": 47837686.0, "reward": 1.021484375, "reward_std": 0.3301824629306793, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.94921875, "step": 176 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.359375, "calib/ece": 0.36323770491803276, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00010701716584071708, "calib/mean_conf": 0.9902868852459016, "calib/mu_c": 0.9903267973856208, "calib/mu_w": 0.9902197802197801, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36323770491803276, "calib/std_conf": 0.001669296053640682, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9181977186311787, "calib/step_q_c_n": 1315.0, "calib/step_q_gap": 0.0026583246917848413, "calib/step_q_w": 0.9155393939393939, "calib/step_q_w_n": 825.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2395.0, "completions/max_terminated_length": 2395.0, "completions/mean_length": 813.44140625, "completions/mean_terminated_length": 836.3092041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 500.0, "epoch": 0.1888, "grad_norm": 0.1733151078224182, "learning_rate": 6.666666666666667e-07, "loss": -0.0216, "num_tokens": 48149759.0, "reward": 1.072265625, "reward_std": 0.222038134932518, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.94921875, "step": 177 }, { "calib/answer_extract_rate": 0.94140625, "calib/avg_num_step_conf": 8.87890625, "calib/ece": 0.3460167364016735, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0008709702062640323, "calib/mean_conf": 0.99036820083682, "calib/mu_c": 0.9900584415584415, "calib/mu_w": 0.9909294117647055, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3460167364016735, "calib/std_conf": 0.0018630949263079577, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9145264750378214, "calib/step_q_c_n": 1322.0, "calib/step_q_gap": 0.0018482415993356094, "calib/step_q_w": 0.9126782334384858, "calib/step_q_w_n": 951.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 782.7109375, "completions/mean_terminated_length": 817.85302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 470.0, "epoch": 0.18986666666666666, "grad_norm": 0.18498072028160095, "learning_rate": 6.388888888888889e-07, "loss": -0.0341, "num_tokens": 48456205.0, "reward": 1.068359375, "reward_std": 0.31419384479522705, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.93359375, "step": 178 }, { "calib/answer_extract_rate": 0.9296875, "calib/avg_num_step_conf": 8.5, "calib/ece": 0.3417907949790795, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00021267281105996627, "calib/mean_conf": 0.990326359832636, "calib/mu_c": 0.9902516129032256, "calib/mu_w": 0.9904642857142856, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3417907949790795, "calib/std_conf": 0.0017554981567835014, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.9156312111801242, "calib/step_q_c_n": 1288.0, "calib/step_q_gap": 0.013353058026971176, "calib/step_q_w": 0.9022781531531531, "calib/step_q_w_n": 888.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 790.3671875, "completions/mean_terminated_length": 829.2376708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 432.0, "epoch": 0.19093333333333334, "grad_norm": 0.17732445895671844, "learning_rate": 6.111111111111112e-07, "loss": -0.0293, "num_tokens": 48764803.0, "reward": 1.068359375, "reward_std": 0.3088341951370239, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.92578125, "step": 179 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.3828125, "calib/ece": 0.3602357723577235, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0001718539524990792, "calib/mean_conf": 0.9903170731707317, "calib/mu_c": 0.9903806451612902, "calib/mu_w": 0.9902087912087911, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3602357723577235, "calib/std_conf": 0.0017311919535429633, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9111107668474052, "calib/step_q_c_n": 1291.0, "calib/step_q_gap": -0.004353560637974874, "calib/step_q_w": 0.9154643274853801, "calib/step_q_w_n": 855.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 836.5234375, "completions/mean_terminated_length": 860.0401000976562, "completions/min_length": 0.0, "completions/min_terminated_length": 477.0, "epoch": 0.192, "grad_norm": 0.16332538425922394, "learning_rate": 5.833333333333334e-07, "loss": -0.0279, "num_tokens": 49082809.0, "reward": 1.08984375, "reward_std": 0.2516862750053406, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.9609375, "step": 180 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.2890625, "calib/ece": 0.43997590361445804, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00015888946819575533, "calib/mean_conf": 0.9901767068273094, "calib/mu_c": 0.9902481751824816, "calib/mu_w": 0.9900892857142859, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43997590361445804, "calib/std_conf": 0.0012646330635942267, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9141155595996361, "calib/step_q_c_n": 1099.0, "calib/step_q_gap": -0.0005667473407353629, "calib/step_q_w": 0.9146823069403714, "calib/step_q_w_n": 1023.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2656.0, "completions/max_terminated_length": 2656.0, "completions/mean_length": 782.58203125, "completions/mean_terminated_length": 798.1713256835938, "completions/min_length": 0.0, "completions/min_terminated_length": 414.0, "epoch": 0.19306666666666666, "grad_norm": 0.19966793060302734, "learning_rate": 5.555555555555555e-07, "loss": -0.0271, "num_tokens": 49389414.0, "reward": 1.01953125, "reward_std": 0.29831817746162415, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.96875, "step": 181 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.88671875, "calib/ece": 0.39507936507936514, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.9215686274518546e-05, "calib/mean_conf": 0.9903174603174604, "calib/mu_c": 0.9903333333333331, "calib/mu_w": 0.9902941176470585, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39507936507936514, "calib/std_conf": 0.0017532319074900432, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9131224322103533, "calib/step_q_c_n": 1217.0, "calib/step_q_gap": 0.002523928469704839, "calib/step_q_w": 0.9105985037406484, "calib/step_q_w_n": 802.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 802.16015625, "completions/mean_terminated_length": 808.4763793945312, "completions/min_length": 0.0, "completions/min_terminated_length": 473.0, "epoch": 0.19413333333333332, "grad_norm": 0.17675307393074036, "learning_rate": 5.277777777777779e-07, "loss": 0.0234, "num_tokens": 49700927.0, "reward": 1.076171875, "reward_std": 0.2512344717979431, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.98046875, "step": 182 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.47265625, "calib/ece": 0.42914634146341457, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00021739130434783593, "calib/mean_conf": 0.9901219512195122, "calib/mu_c": 0.9902173913043477, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42914634146341457, "calib/std_conf": 0.0010975609756097569, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9101256038647343, "calib/step_q_c_n": 1035.0, "calib/step_q_gap": 0.002125603864734371, "calib/step_q_w": 0.9079999999999999, "calib/step_q_w_n": 878.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2599.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 835.4375, "completions/mean_terminated_length": 858.9236450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 414.0, "epoch": 0.1952, "grad_norm": 0.20332731306552887, "learning_rate": 5.000000000000001e-07, "loss": -0.0296, "num_tokens": 50021479.0, "reward": 1.01953125, "reward_std": 0.33503273129463196, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.9609375, "step": 183 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.26953125, "calib/ece": 0.3126612903225807, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00011904761904768524, "calib/mean_conf": 0.9900806451612904, "calib/mu_c": 0.9901190476190477, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3126612903225807, "calib/std_conf": 0.0008943981053555988, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9111671924290222, "calib/step_q_c_n": 1268.0, "calib/step_q_gap": 0.009423965102756116, "calib/step_q_w": 0.9017432273262661, "calib/step_q_w_n": 849.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2445.0, "completions/max_terminated_length": 2445.0, "completions/mean_length": 798.4609375, "completions/mean_terminated_length": 817.6240234375, "completions/min_length": 0.0, "completions/min_terminated_length": 476.0, "epoch": 0.19626666666666667, "grad_norm": 0.18218982219696045, "learning_rate": 4.7222222222222226e-07, "loss": -0.0186, "num_tokens": 50331165.0, "reward": 1.140625, "reward_std": 0.23907458782196045, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.96875, "step": 184 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 7.78125, "calib/ece": 0.42362500000000003, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0003450226244343657, "calib/mean_conf": 0.9902916666666667, "calib/mu_c": 0.9904411764705882, "calib/mu_w": 0.9900961538461538, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42362500000000003, "calib/std_conf": 0.0016827350421923905, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9101146131805159, "calib/step_q_c_n": 1047.0, "calib/step_q_gap": 0.0370458301117329, "calib/step_q_w": 0.873068783068783, "calib/step_q_w_n": 945.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2840.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 813.578125, "completions/mean_terminated_length": 839.8225708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 399.0, "epoch": 0.19733333333333333, "grad_norm": 0.16892407834529877, "learning_rate": 4.444444444444445e-07, "loss": -0.0162, "num_tokens": 50646361.0, "reward": 1.0, "reward_std": 0.2559589147567749, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.9375, "step": 185 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.07421875, "calib/ece": 0.4303662551440329, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -8.411214953252255e-05, "calib/mean_conf": 0.990037037037037, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900841121495326, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4303662551440329, "calib/std_conf": 0.0005761610809668172, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9128520179372198, "calib/step_q_c_n": 1115.0, "calib/step_q_gap": 0.001009580962429979, "calib/step_q_w": 0.9118424369747898, "calib/step_q_w_n": 952.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2818.0, "completions/max_terminated_length": 2818.0, "completions/mean_length": 812.67578125, "completions/mean_terminated_length": 845.7113647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 400.0, "epoch": 0.1984, "grad_norm": 0.1680852174758911, "learning_rate": 4.1666666666666667e-07, "loss": -0.0338, "num_tokens": 50959446.0, "reward": 1.005859375, "reward_std": 0.3241080045700073, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.94921875, "step": 186 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 7.8125, "calib/ece": 0.40235510204081626, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00011571782178176893, "calib/mean_conf": 0.9901102040816326, "calib/mu_c": 0.9900625000000001, "calib/mu_w": 0.9901782178217818, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40235510204081626, "calib/std_conf": 0.0009939089799330242, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9107363717605005, "calib/step_q_c_n": 1119.0, "calib/step_q_gap": 0.008383363814984057, "calib/step_q_w": 0.9023530079455164, "calib/step_q_w_n": 881.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 843.18359375, "completions/mean_terminated_length": 859.9801025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 492.0, "epoch": 0.19946666666666665, "grad_norm": 0.18715514242649078, "learning_rate": 3.8888888888888895e-07, "loss": 0.0284, "num_tokens": 51276845.0, "reward": 1.041015625, "reward_std": 0.3368094861507416, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.95703125, "step": 187 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.51171875, "calib/ece": 0.37746640316205526, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.719552337055898e-05, "calib/mean_conf": 0.9901146245059288, "calib/mu_c": 0.9901290322580644, "calib/mu_w": 0.9900918367346938, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37746640316205526, "calib/std_conf": 0.001047632167986198, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.910450997398092, "calib/step_q_c_n": 1153.0, "calib/step_q_gap": -0.00034640519931072866, "calib/step_q_w": 0.9107974025974027, "calib/step_q_w_n": 770.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 873.3203125, "completions/mean_terminated_length": 876.7451782226562, "completions/min_length": 0.0, "completions/min_terminated_length": 385.0, "epoch": 0.20053333333333334, "grad_norm": 0.15324848890304565, "learning_rate": 3.611111111111111e-07, "loss": -0.0018, "num_tokens": 51604487.0, "reward": 1.099609375, "reward_std": 0.23567822575569153, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.98828125, "step": 188 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 7.66015625, "calib/ece": 0.35780991735537204, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00011235955056165814, "calib/mean_conf": 0.9900413223140497, "calib/mu_c": 0.99, "calib/mu_w": 0.9901123595505616, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35780991735537204, "calib/std_conf": 0.0006414948221595057, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.9097991631799164, "calib/step_q_c_n": 1195.0, "calib/step_q_gap": 0.0024623485585064664, "calib/step_q_w": 0.9073368146214099, "calib/step_q_w_n": 766.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2874.0, "completions/max_terminated_length": 2874.0, "completions/mean_length": 791.296875, "completions/mean_terminated_length": 823.46337890625, "completions/min_length": 0.0, "completions/min_terminated_length": 438.0, "epoch": 0.2016, "grad_norm": 0.19075053930282593, "learning_rate": 3.3333333333333335e-07, "loss": -0.0486, "num_tokens": 51914827.0, "reward": 1.0703125, "reward_std": 0.27192389965057373, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9453125, "step": 189 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.15234375, "calib/ece": 0.33951807228915676, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0001851851851855102, "calib/mean_conf": 0.990120481927711, "calib/mu_c": 0.9901851851851852, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33951807228915676, "calib/std_conf": 0.0010910102576069185, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9096304849884528, "calib/step_q_c_n": 1299.0, "calib/step_q_gap": -0.0019177383618010913, "calib/step_q_w": 0.9115482233502539, "calib/step_q_w_n": 788.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1823.0, "completions/max_terminated_length": 1823.0, "completions/mean_length": 818.33984375, "completions/mean_terminated_length": 841.3453369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 417.0, "epoch": 0.20266666666666666, "grad_norm": 0.1583220213651657, "learning_rate": 3.055555555555556e-07, "loss": -0.0538, "num_tokens": 52229930.0, "reward": 1.119140625, "reward_std": 0.244254007935524, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.97265625, "step": 190 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.2421875, "calib/ece": 0.39330645161290323, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.2432432432449865e-05, "calib/mean_conf": 0.9900806451612904, "calib/mu_c": 0.9900675675675674, "calib/mu_w": 0.9900999999999999, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39330645161290323, "calib/std_conf": 0.0008943981053555988, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9113084112149532, "calib/step_q_c_n": 1177.0, "calib/step_q_gap": 0.008811090743356065, "calib/step_q_w": 0.9024973204715971, "calib/step_q_w_n": 933.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2827.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 807.09765625, "completions/mean_terminated_length": 823.17529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 439.0, "epoch": 0.20373333333333332, "grad_norm": 0.15239450335502625, "learning_rate": 2.7777777777777776e-07, "loss": 0.0041, "num_tokens": 52540715.0, "reward": 1.0625, "reward_std": 0.2156136929988861, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.96875, "step": 191 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.78515625, "calib/ece": 0.3033694779116467, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00018511021142553208, "calib/mean_conf": 0.9901164658634539, "calib/mu_c": 0.9900584795321639, "calib/mu_w": 0.9902435897435894, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3033694779116467, "calib/std_conf": 0.0010559118139668953, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9111232676878191, "calib/step_q_c_n": 1371.0, "calib/step_q_gap": 0.012917479906468743, "calib/step_q_w": 0.8982057877813504, "calib/step_q_w_n": 622.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2299.0, "completions/max_terminated_length": 2299.0, "completions/mean_length": 818.88671875, "completions/mean_terminated_length": 831.8849487304688, "completions/min_length": 0.0, "completions/min_terminated_length": 386.0, "epoch": 0.2048, "grad_norm": 0.1535855233669281, "learning_rate": 2.5000000000000004e-07, "loss": -0.0124, "num_tokens": 52855326.0, "reward": 1.154296875, "reward_std": 0.2417459487915039, "rewards/accuracy_reward_step": 0.66796875, "rewards/format_reward_step": 0.97265625, "step": 192 }, { "calib/answer_extract_rate": 0.96875, "calib/avg_num_step_conf": 8.0, "calib/ece": 0.4038669354838709, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9919354838709677, "calib/gap": 0.014229166666666515, "calib/mean_conf": 0.9845120967741936, "calib/mu_c": 0.9904791666666666, "calib/mu_w": 0.9762500000000001, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4038669354838709, "calib/std_conf": 0.06710944250269774, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9107306985294119, "calib/step_q_c_n": 1088.0, "calib/step_q_gap": 0.015761948529411818, "calib/step_q_w": 0.8949687500000001, "calib/step_q_w_n": 960.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1859.0, "completions/max_terminated_length": 1859.0, "completions/mean_length": 813.01953125, "completions/mean_terminated_length": 829.2151489257812, "completions/min_length": 0.0, "completions/min_terminated_length": 488.0, "epoch": 0.20586666666666667, "grad_norm": 0.18594703078269958, "learning_rate": 2.2222222222222224e-07, "loss": -0.0141, "num_tokens": 53169171.0, "reward": 1.046875, "reward_std": 0.31658273935317993, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.96875, "step": 193 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.84375, "calib/ece": 0.39098380566801616, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00035005460005466116, "calib/mean_conf": 0.9901740890688259, "calib/mu_c": 0.9900337837837836, "calib/mu_w": 0.9903838383838383, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39098380566801616, "calib/std_conf": 0.0012395542003270886, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9095937770095073, "calib/step_q_c_n": 1157.0, "calib/step_q_gap": 0.007634905094113575, "calib/step_q_w": 0.9019588719153937, "calib/step_q_w_n": 851.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 811.765625, "completions/mean_terminated_length": 821.391357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 473.0, "epoch": 0.20693333333333333, "grad_norm": 0.1659308820962906, "learning_rate": 1.9444444444444447e-07, "loss": 0.0241, "num_tokens": 53482927.0, "reward": 1.060546875, "reward_std": 0.2599868178367615, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.96484375, "step": 194 }, { "calib/answer_extract_rate": 0.94921875, "calib/avg_num_step_conf": 8.26171875, "calib/ece": 0.38534567901234573, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00015943877550983476, "calib/mean_conf": 0.990283950617284, "calib/mu_c": 0.9903469387755099, "calib/mu_w": 0.9901875000000001, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38534567901234573, "calib/std_conf": 0.001567949838850028, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9125693617021279, "calib/step_q_c_n": 1175.0, "calib/step_q_gap": 0.007242765957447106, "calib/step_q_w": 0.9053265957446808, "calib/step_q_w_n": 940.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 810.203125, "completions/mean_terminated_length": 836.3386840820312, "completions/min_length": 0.0, "completions/min_terminated_length": 441.0, "epoch": 0.208, "grad_norm": 0.16117671132087708, "learning_rate": 1.6666666666666668e-07, "loss": 0.0094, "num_tokens": 53796323.0, "reward": 1.048828125, "reward_std": 0.2848566770553589, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.94921875, "step": 195 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.98046875, "calib/ece": 0.33410671936758896, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00015261044176695382, "calib/mean_conf": 0.9902332015810277, "calib/mu_c": 0.9901807228915661, "calib/mu_w": 0.9903333333333331, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33410671936758896, "calib/std_conf": 0.0014973508975364576, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9123646071700993, "calib/step_q_c_n": 1311.0, "calib/step_q_gap": 0.00786460717009918, "calib/step_q_w": 0.9045000000000001, "calib/step_q_w_n": 732.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1867.0, "completions/max_terminated_length": 1867.0, "completions/mean_length": 735.75, "completions/mean_terminated_length": 744.474365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 444.0, "epoch": 0.20906666666666668, "grad_norm": 0.1402597725391388, "learning_rate": 1.3888888888888888e-07, "loss": -0.0177, "num_tokens": 54087219.0, "reward": 1.142578125, "reward_std": 0.12430930137634277, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.98828125, "step": 196 }, { "calib/answer_extract_rate": 0.9609375, "calib/avg_num_step_conf": 8.14453125, "calib/ece": 0.4454430894308944, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00018430170575711724, "calib/mean_conf": 0.9901585365853659, "calib/mu_c": 0.9900746268656716, "calib/mu_w": 0.9902589285714287, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4454430894308944, "calib/std_conf": 0.0012343608220068457, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9119418386491558, "calib/step_q_c_n": 1066.0, "calib/step_q_gap": 0.0061381095029339194, "calib/step_q_w": 0.9058037291462219, "calib/step_q_w_n": 1019.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 809.0, "completions/mean_terminated_length": 828.416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 504.0, "epoch": 0.21013333333333334, "grad_norm": 0.19100052118301392, "learning_rate": 1.1111111111111112e-07, "loss": -0.0037, "num_tokens": 54399379.0, "reward": 1.00390625, "reward_std": 0.3031455874443054, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.9609375, "step": 197 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 7.859375, "calib/ece": 0.34650607287449386, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.4316752428063e-06, "calib/mean_conf": 0.9902307692307691, "calib/mu_c": 0.9902327044025155, "calib/mu_w": 0.9902272727272727, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34650607287449386, "calib/std_conf": 0.0014646249106620441, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9112080924855491, "calib/step_q_c_n": 1211.0, "calib/step_q_gap": -0.004846838850281254, "calib/step_q_w": 0.9160549313358304, "calib/step_q_w_n": 801.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 771.47265625, "completions/mean_terminated_length": 799.5830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 414.0, "epoch": 0.2112, "grad_norm": 0.17476636171340942, "learning_rate": 8.333333333333334e-08, "loss": -0.0466, "num_tokens": 54702260.0, "reward": 1.103515625, "reward_std": 0.23565450310707092, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.96484375, "step": 198 }, { "calib/answer_extract_rate": 0.9375, "calib/avg_num_step_conf": 7.6171875, "calib/ece": 0.31928750000000017, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00019726393584351687, "calib/mean_conf": 0.9901208333333334, "calib/mu_c": 0.990055900621118, "calib/mu_w": 0.9902531645569616, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31928750000000017, "calib/std_conf": 0.0010752825855973355, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9103620414673047, "calib/step_q_c_n": 1254.0, "calib/step_q_gap": 0.0015258345707528376, "calib/step_q_w": 0.9088362068965519, "calib/step_q_w_n": 696.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 871.69921875, "completions/mean_terminated_length": 896.2047729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 427.0, "epoch": 0.21226666666666666, "grad_norm": 0.19598470628261566, "learning_rate": 5.555555555555556e-08, "loss": -0.028, "num_tokens": 55029615.0, "reward": 1.099609375, "reward_std": 0.325309157371521, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.93359375, "step": 199 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 7.5078125, "calib/ece": 0.3274096385542169, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -8.874458874452706e-05, "calib/mean_conf": 0.9900602409638555, "calib/mu_c": 0.9900303030303029, "calib/mu_w": 0.9901190476190475, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3274096385542169, "calib/std_conf": 0.0007059596719376287, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.9145186953062848, "calib/step_q_c_n": 1257.0, "calib/step_q_gap": 0.0061878682386157235, "calib/step_q_w": 0.9083308270676691, "calib/step_q_w_n": 665.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2309.0, "completions/max_terminated_length": 2309.0, "completions/mean_length": 811.74609375, "completions/mean_terminated_length": 831.22802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 459.0, "epoch": 0.21333333333333335, "grad_norm": 0.14802969992160797, "learning_rate": 2.777777777777778e-08, "loss": -0.0269, "num_tokens": 55345470.0, "reward": 1.12890625, "reward_std": 0.2350997030735016, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.96875, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.006713523315265774, "train_runtime": 6552.8806, "train_samples_per_second": 7.813, "train_steps_per_second": 0.031 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 55345470, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }