{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.8921052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.19807541241162607, "calib/step_q_w": 0.6940298507462687, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.004072976764291525, "learning_rate": 2.5000000000000004e-07, "loss": 0.0316, "num_tokens": 264685.0, "reward": 0.037109375, "reward_std": 0.07518024742603302, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.781, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.042553719008264435, "calib/step_q_w": 0.8235537190082645, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006365728098899126, "learning_rate": 5.000000000000001e-07, "loss": 0.0624, "num_tokens": 533467.0, "reward": 0.076171875, "reward_std": 0.143990620970726, "rewards/accuracy_reward_step": 0.03125, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.1111111111111111, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.79, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": -0.06666666666666654, "calib/mean_conf": 0.9445454545454546, "calib/mu_c": 0.89, "calib/mu_w": 0.9566666666666666, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.7763636363636364, "calib/std_conf": 0.03985510948505994, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.791875, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.06823863636363625, "calib/step_q_w": 0.7236363636363637, "calib/step_q_w_n": 55.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 623.34375, "completions/mean_terminated_length": 684.8755493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.004042081534862518, "learning_rate": 7.5e-07, "loss": 0.0159, "num_tokens": 798299.0, "reward": 0.02734375, "reward_std": 0.06346043199300766, "rewards/accuracy_reward_step": 0.0078125, "rewards/format_reward_step": 0.0390625, "step": 3 }, { "calib/answer_extract_rate": 0.05859375, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.9642857142857143, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.9642857142857143, "calib/mu_c": NaN, "calib/mu_w": 0.9642857142857143, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.9642857142857143, "calib/std_conf": 0.013997084244475315, "calib/step_conf_rate": 0.0546875, "calib/step_q_w": 0.758955223880597, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 706.9296875, "completions/mean_terminated_length": 776.7124633789062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.002299716928973794, "learning_rate": 1.0000000000000002e-06, "loss": 0.0139, "num_tokens": 1085441.0, "reward": 0.01171875, "reward_std": 0.02551448345184326, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.0234375, "step": 4 }, { "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.47619047619047616, "calib/avg_num_step_conf": 0.5078125, "calib/ece": 0.786470588235294, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": -0.043571428571428594, "calib/mean_conf": 0.9358823529411765, "calib/mu_c": 0.9, "calib/mu_w": 0.9435714285714286, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.7729411764705882, "calib/std_conf": 0.05402165399491225, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.7225, "calib/step_q_c_n": 12.0, "calib/step_q_gap": -0.04648305084745752, "calib/step_q_w": 0.7689830508474575, "calib/step_q_w_n": 118.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2801.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 665.02734375, "completions/mean_terminated_length": 718.3417358398438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.005368305370211601, "learning_rate": 1.25e-06, "loss": 0.0187, "num_tokens": 1362376.0, "reward": 0.041015625, "reward_std": 0.10456298291683197, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.05078125, "step": 5 }, { "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.42578125, "calib/ece": 0.7906249999999999, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.6875, "calib/gap": 0.14600000000000002, "calib/mean_conf": 0.8531249999999999, "calib/mu_c": 0.99, "calib/mu_w": 0.844, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.7906249999999999, "calib/std_conf": 0.2658881049896742, "calib/step_conf_rate": 0.0859375, "calib/step_q_c": 0.9866666666666667, "calib/step_q_c_n": 3.0, "calib/step_q_gap": 0.22364779874213825, "calib/step_q_w": 0.7630188679245284, "calib/step_q_w_n": 106.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 605.75390625, "completions/mean_terminated_length": 686.1636962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.005905688274651766, "learning_rate": 1.5e-06, "loss": -0.0005, "num_tokens": 1623401.0, "reward": 0.03125, "reward_std": 0.07227109372615814, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.0546875, "step": 6 }, { "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.6785714285714286, "calib/avg_num_step_conf": 0.453125, "calib/ece": 0.8293333333333335, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.0685714285714285, "calib/mean_conf": 0.8960000000000001, "calib/mu_c": 0.96, "calib/mu_w": 0.8914285714285715, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.8293333333333335, "calib/std_conf": 0.10150862032359616, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.83, "calib/step_q_c_n": 4.0, "calib/step_q_gap": 0.08101785714285714, "calib/step_q_w": 0.7489821428571428, "calib/step_q_w_n": 112.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 713.95703125, "completions/mean_terminated_length": 791.22509765625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.007466666666666667, "grad_norm": 0.004854544997215271, "learning_rate": 1.75e-06, "loss": 0.0212, "num_tokens": 1913598.0, "reward": 0.0234375, "reward_std": 0.06629125773906708, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.0390625, "step": 7 }, { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6458333333333334, "calib/avg_num_step_conf": 0.37109375, "calib/ece": 0.5511333333333334, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.11330000000000007, "calib/mean_conf": 0.8844666666666666, "calib/mu_c": 0.96, "calib/mu_w": 0.8466999999999999, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.5511333333333334, "calib/std_conf": 0.2288445663666838, "calib/step_conf_rate": 0.08984375, "calib/step_q_c": 0.8510344827586208, "calib/step_q_c_n": 29.0, "calib/step_q_gap": 0.08688296760710568, "calib/step_q_w": 0.7641515151515151, "calib/step_q_w_n": 66.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 695.8515625, "completions/mean_terminated_length": 758.0339965820312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.0034913497511297464, "learning_rate": 2.0000000000000003e-06, "loss": 0.0336, "num_tokens": 2198248.0, "reward": 0.052734375, "reward_std": 0.11081699281930923, "rewards/accuracy_reward_step": 0.0234375, "rewards/format_reward_step": 0.05859375, "step": 8 }, { "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.7545454545454545, "calib/avg_num_step_conf": 0.34765625, "calib/ece": 0.6024999999999999, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8125, "calib/gap": 0.07127272727272738, "calib/mean_conf": 0.915, "calib/mu_c": 0.9640000000000001, "calib/mu_w": 0.8927272727272727, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.6024999999999999, "calib/std_conf": 0.13847382424126228, "calib/step_conf_rate": 0.08203125, "calib/step_q_c": 0.8352941176470587, "calib/step_q_c_n": 17.0, "calib/step_q_gap": 0.04543300653594762, "calib/step_q_w": 0.7898611111111111, "calib/step_q_w_n": 72.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 649.29296875, "completions/mean_terminated_length": 719.562744140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.00364897632971406, "learning_rate": 2.25e-06, "loss": 0.0231, "num_tokens": 2472003.0, "reward": 0.041015625, "reward_std": 0.09140633791685104, "rewards/accuracy_reward_step": 0.01953125, "rewards/format_reward_step": 0.04296875, "step": 9 }, { "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.4083333333333333, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.8147826086956523, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8695652173913043, "calib/gap": -0.006000000000000338, "calib/mean_conf": 0.9452173913043481, "calib/mu_c": 0.94, "calib/mu_w": 0.9460000000000003, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.8147826086956523, "calib/std_conf": 0.033345303458806964, "calib/step_conf_rate": 0.08203125, "calib/step_q_c": 0.8081818181818181, "calib/step_q_c_n": 11.0, "calib/step_q_gap": 0.04779006560449861, "calib/step_q_w": 0.7603917525773195, "calib/step_q_w_n": 97.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 662.890625, "completions/mean_terminated_length": 722.1276245117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0058372789062559605, "learning_rate": 2.5e-06, "loss": 0.0497, "num_tokens": 2748503.0, "reward": 0.04296875, "reward_std": 0.10015765577554703, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.0625, "step": 10 }, { "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.4722222222222222, "calib/avg_num_step_conf": 0.6328125, "calib/ece": 0.6062962962962962, "calib/final_conf_rate": 0.10546875, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.8148148148148148, "calib/gap": 0.01722222222222225, "calib/mean_conf": 0.9396296296296295, "calib/mu_c": 0.951111111111111, "calib/mu_w": 0.9338888888888888, "calib/nonempty_final_conf_rate": 0.10546875, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.1484375, "calib/pce": 0.6062962962962962, "calib/std_conf": 0.050733981707163735, "calib/step_conf_rate": 0.1484375, "calib/step_q_c": 0.7855208333333333, "calib/step_q_c_n": 32.0, "calib/step_q_gap": 0.017208430232558136, "calib/step_q_w": 0.7683124031007752, "calib/step_q_w_n": 129.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 675.328125, "completions/mean_terminated_length": 720.3500366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.005052024032920599, "learning_rate": 2.7500000000000004e-06, "loss": 0.0647, "num_tokens": 3025867.0, "reward": 0.078125, "reward_std": 0.1473740190267563, "rewards/accuracy_reward_step": 0.03515625, "rewards/format_reward_step": 0.0859375, "step": 11 }, { "calib/answer_extract_rate": 0.18359375, "calib/auroc": 0.3007246376811594, "calib/avg_num_step_conf": 0.6015625, "calib/ece": 0.6214285714285712, "calib/final_conf_rate": 0.13671875, "calib/format_rate": 0.109375, "calib/frac_conf_gt_0.9": 0.7714285714285715, "calib/gap": -0.06829710144927525, "calib/mean_conf": 0.9357142857142857, "calib/mu_c": 0.8908333333333333, "calib/mu_w": 0.9591304347826085, "calib/nonempty_final_conf_rate": 0.13671875, "calib/nonempty_reasoning_rate": 0.21484375, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.607142857142857, "calib/std_conf": 0.0871217773098819, "calib/step_conf_rate": 0.14453125, "calib/step_q_c": 0.7774651162790697, "calib/step_q_c_n": 43.0, "calib/step_q_gap": -0.023886235072281714, "calib/step_q_w": 0.8013513513513514, "calib/step_q_w_n": 111.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 503.56640625, "completions/mean_terminated_length": 558.06494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.007954644039273262, "learning_rate": 3e-06, "loss": 0.0132, "num_tokens": 3258956.0, "reward": 0.1015625, "reward_std": 0.16241982579231262, "rewards/accuracy_reward_step": 0.046875, "rewards/format_reward_step": 0.109375, "step": 12 }, { "calib/answer_extract_rate": 0.1953125, "calib/auroc": 0.7095238095238096, "calib/avg_num_step_conf": 1.1171875, "calib/ece": 0.6761702127659575, "calib/final_conf_rate": 0.18359375, "calib/format_rate": 0.15625, "calib/frac_conf_gt_0.9": 0.8085106382978723, "calib/gap": 0.04947619047619067, "calib/mean_conf": 0.9314893617021275, "calib/mu_c": 0.9683333333333334, "calib/mu_w": 0.9188571428571427, "calib/nonempty_final_conf_rate": 0.18359375, "calib/nonempty_reasoning_rate": 0.265625, "calib/nonempty_step_conf_rate": 0.23828125, "calib/pce": 0.6761702127659575, "calib/std_conf": 0.10431088402043429, "calib/step_conf_rate": 0.23828125, "calib/step_q_c": 0.7465442307692307, "calib/step_q_c_n": 52.0, "calib/step_q_gap": 0.020316880341880172, "calib/step_q_w": 0.7262273504273505, "calib/step_q_w_n": 234.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 592.3671875, "completions/mean_terminated_length": 629.236572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.010851632803678513, "learning_rate": 3.2500000000000002e-06, "loss": 0.0849, "num_tokens": 3515194.0, "reward": 0.12890625, "reward_std": 0.21148112416267395, "rewards/accuracy_reward_step": 0.05078125, "rewards/format_reward_step": 0.15625, "step": 13 }, { "calib/answer_extract_rate": 0.26953125, "calib/auroc": 0.5944444444444444, "calib/avg_num_step_conf": 1.2734375, "calib/ece": 0.6903459016393441, "calib/final_conf_rate": 0.23828125, "calib/format_rate": 0.21875, "calib/frac_conf_gt_0.9": 0.7704918032786885, "calib/gap": -0.0544688888888889, "calib/mean_conf": 0.8801819672131148, "calib/mu_c": 0.84, "calib/mu_w": 0.8944688888888889, "calib/nonempty_final_conf_rate": 0.23828125, "calib/nonempty_reasoning_rate": 0.33203125, "calib/nonempty_step_conf_rate": 0.29296875, "calib/pce": 0.6541163934426227, "calib/std_conf": 0.22998876345196273, "calib/step_conf_rate": 0.29296875, "calib/step_q_c": 0.8246969696969697, "calib/step_q_c_n": 66.0, "calib/step_q_gap": 0.03877773892773895, "calib/step_q_w": 0.7859192307692308, "calib/step_q_w_n": 260.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 598.984375, "completions/mean_terminated_length": 638.9166870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.014933333333333333, "grad_norm": 0.011941647157073021, "learning_rate": 3.5e-06, "loss": 0.0874, "num_tokens": 3773934.0, "reward": 0.171875, "reward_std": 0.27124205231666565, "rewards/accuracy_reward_step": 0.0625, "rewards/format_reward_step": 0.21875, "step": 14 }, { "calib/answer_extract_rate": 0.41796875, "calib/auroc": 0.525, "calib/avg_num_step_conf": 1.9609375, "calib/ece": 0.6812045454545455, "calib/final_conf_rate": 0.34375, "calib/format_rate": 0.3046875, "calib/frac_conf_gt_0.9": 0.7386363636363636, "calib/gap": 0.002794117647058725, "calib/mean_conf": 0.8873409090909092, "calib/mu_c": 0.8895, "calib/mu_w": 0.8867058823529412, "calib/nonempty_final_conf_rate": 0.34375, "calib/nonempty_reasoning_rate": 0.46875, "calib/nonempty_step_conf_rate": 0.3671875, "calib/pce": 0.6706363636363637, "calib/std_conf": 0.20150608418310825, "calib/step_conf_rate": 0.3671875, "calib/step_q_c": 0.7998780487804878, "calib/step_q_c_n": 82.0, "calib/step_q_gap": 0.03317754878048773, "calib/step_q_w": 0.7667005000000001, "calib/step_q_w_n": 420.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 521.24609375, "completions/mean_terminated_length": 542.4349365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.01295667327940464, "learning_rate": 3.7500000000000005e-06, "loss": 0.1144, "num_tokens": 4015253.0, "reward": 0.23046875, "reward_std": 0.3025348484516144, "rewards/accuracy_reward_step": 0.078125, "rewards/format_reward_step": 0.3046875, "step": 15 }, { "calib/answer_extract_rate": 0.44921875, "calib/auroc": 0.6197289156626505, "calib/avg_num_step_conf": 2.45703125, "calib/ece": 0.6569582554517132, "calib/final_conf_rate": 0.41796875, "calib/format_rate": 0.34375, "calib/frac_conf_gt_0.9": 0.719626168224299, "calib/gap": 0.07901666666666685, "calib/mean_conf": 0.8707900311526481, "calib/mu_c": 0.9320833333333334, "calib/mu_w": 0.8530666666666665, "calib/nonempty_final_conf_rate": 0.41796875, "calib/nonempty_reasoning_rate": 0.51171875, "calib/nonempty_step_conf_rate": 0.4140625, "calib/pce": 0.6517246105919001, "calib/std_conf": 0.2131570209511951, "calib/step_conf_rate": 0.4140625, "calib/step_q_c": 0.785578947368421, "calib/step_q_c_n": 95.0, "calib/step_q_gap": 0.07400553912872077, "calib/step_q_w": 0.7115734082397003, "calib/step_q_w_n": 534.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2855.0, "completions/max_terminated_length": 2855.0, "completions/mean_length": 544.0546875, "completions/mean_terminated_length": 563.8785400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.014287542551755905, "learning_rate": 4.000000000000001e-06, "loss": 0.0985, "num_tokens": 4263379.0, "reward": 0.265625, "reward_std": 0.34611278772354126, "rewards/accuracy_reward_step": 0.09375, "rewards/format_reward_step": 0.34375, "step": 16 }, { "calib/answer_extract_rate": 0.57421875, "calib/auroc": 0.5104166666666666, "calib/avg_num_step_conf": 2.859375, "calib/ece": 0.6633582089552237, "calib/final_conf_rate": 0.5234375, "calib/format_rate": 0.4453125, "calib/frac_conf_gt_0.9": 0.746268656716418, "calib/gap": 0.06454487179487167, "calib/mean_conf": 0.8872388059701491, "calib/mu_c": 0.9373333333333334, "calib/mu_w": 0.8727884615384617, "calib/nonempty_final_conf_rate": 0.5234375, "calib/nonempty_reasoning_rate": 0.65625, "calib/nonempty_step_conf_rate": 0.58984375, "calib/pce": 0.6633582089552237, "calib/std_conf": 0.20051572235614626, "calib/step_conf_rate": 0.58984375, "calib/step_q_c": 0.7984973856209151, "calib/step_q_c_n": 153.0, "calib/step_q_gap": 0.06040964814250405, "calib/step_q_w": 0.738087737478411, "calib/step_q_w_n": 579.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 477.1640625, "completions/mean_terminated_length": 496.5609436035156, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.018133333333333335, "grad_norm": 0.010662867687642574, "learning_rate": 4.25e-06, "loss": 0.1355, "num_tokens": 4489061.0, "reward": 0.34375, "reward_std": 0.382437139749527, "rewards/accuracy_reward_step": 0.12109375, "rewards/format_reward_step": 0.4453125, "step": 17 }, { "calib/answer_extract_rate": 0.60546875, "calib/auroc": 0.500975800156128, "calib/avg_num_step_conf": 2.6796875, "calib/ece": 0.7496902097902098, "calib/final_conf_rate": 0.55859375, "calib/format_rate": 0.46875, "calib/frac_conf_gt_0.9": 0.7482517482517482, "calib/gap": 0.018005581576893137, "calib/mean_conf": 0.8965433566433565, "calib/mu_c": 0.9119047619047619, "calib/mu_w": 0.8938991803278687, "calib/nonempty_final_conf_rate": 0.55859375, "calib/nonempty_reasoning_rate": 0.70703125, "calib/nonempty_step_conf_rate": 0.60546875, "calib/pce": 0.7496902097902098, "calib/std_conf": 0.18018304351173703, "calib/step_conf_rate": 0.60546875, "calib/step_q_c": 0.7883529411764707, "calib/step_q_c_n": 85.0, "calib/step_q_gap": 0.06148322403836748, "calib/step_q_w": 0.7268697171381032, "calib/step_q_w_n": 601.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2758.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 399.8984375, "completions/mean_terminated_length": 407.86456298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0192, "grad_norm": 0.011393941938877106, "learning_rate": 4.5e-06, "loss": 0.1707, "num_tokens": 4702155.0, "reward": 0.3203125, "reward_std": 0.3561851978302002, "rewards/accuracy_reward_step": 0.0859375, "rewards/format_reward_step": 0.46875, "step": 18 }, { "calib/answer_extract_rate": 0.82421875, "calib/auroc": 0.47573930519666957, "calib/avg_num_step_conf": 4.03125, "calib/ece": 0.6919258536585366, "calib/final_conf_rate": 0.80078125, "calib/format_rate": 0.74609375, "calib/frac_conf_gt_0.9": 0.7170731707317073, "calib/gap": -0.020047459086993902, "calib/mean_conf": 0.876191219512195, "calib/mu_c": 0.8603488372093022, "calib/mu_w": 0.8803962962962961, "calib/nonempty_final_conf_rate": 0.80078125, "calib/nonempty_reasoning_rate": 0.92578125, "calib/nonempty_step_conf_rate": 0.8828125, "calib/pce": 0.679180487804878, "calib/std_conf": 0.21539969816989882, "calib/step_conf_rate": 0.8828125, "calib/step_q_c": 0.7497914572864323, "calib/step_q_c_n": 199.0, "calib/step_q_gap": 0.020316867450497877, "calib/step_q_w": 0.7294745898359344, "calib/step_q_w_n": 833.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2477.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 307.09375, "completions/mean_terminated_length": 307.09375, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 0.020266666666666665, "grad_norm": 0.012713734991848469, "learning_rate": 4.75e-06, "loss": 0.094, "num_tokens": 4885531.0, "reward": 0.544921875, "reward_std": 0.4169868230819702, "rewards/accuracy_reward_step": 0.171875, "rewards/format_reward_step": 0.74609375, "step": 19 }, { "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.46930579460699934, "calib/avg_num_step_conf": 4.18359375, "calib/ece": 0.6262445414847163, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.7379912663755459, "calib/gap": 0.02309523809523817, "calib/mean_conf": 0.901353711790393, "calib/mu_c": 0.9180952380952381, "calib/mu_w": 0.8949999999999999, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.6262445414847163, "calib/std_conf": 0.15072576024234574, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.7587795275590552, "calib/step_q_c_n": 254.0, "calib/step_q_gap": 0.05256165730201723, "calib/step_q_w": 0.706217870257038, "calib/step_q_w_n": 817.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 258.875, "completions/mean_terminated_length": 260.91339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.021333333333333333, "grad_norm": 0.012193223461508751, "learning_rate": 5e-06, "loss": 0.0445, "num_tokens": 5056675.0, "reward": 0.6640625, "reward_std": 0.4222579300403595, "rewards/accuracy_reward_step": 0.24609375, "rewards/format_reward_step": 0.8359375, "step": 20 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4590178571428572, "calib/avg_num_step_conf": 4.671875, "calib/ece": 0.6457531380753138, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.7782426778242678, "calib/gap": 0.016340178571428554, "calib/mean_conf": 0.9060041841004185, "calib/mu_c": 0.91796875, "calib/mu_w": 0.9016285714285714, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.6419874476987447, "calib/std_conf": 0.16417677221456925, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7365342960288809, "calib/step_q_c_n": 277.0, "calib/step_q_gap": 0.0021822829712094816, "calib/step_q_w": 0.7343520130576714, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 257.77734375, "completions/mean_terminated_length": 258.7882385253906, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0224, "grad_norm": 0.009520730935037136, "learning_rate": 4.9722222222222224e-06, "loss": 0.1137, "num_tokens": 5225626.0, "reward": 0.71484375, "reward_std": 0.38410425186157227, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.921875, "step": 21 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.549842877094972, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.6523456790123459, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.041869762569832436, "calib/mean_conf": 0.9157201646090536, "calib/mu_c": 0.9465625, "calib/mu_w": 0.9046927374301675, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6523456790123459, "calib/std_conf": 0.1492443100388931, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7564852583870968, "calib/step_q_c_n": 310.0, "calib/step_q_gap": 0.00028174138826908557, "calib/step_q_w": 0.7562035169988277, "calib/step_q_w_n": 853.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 594.0, "completions/max_terminated_length": 594.0, "completions/mean_length": 235.8671875, "completions/mean_terminated_length": 236.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.023466666666666667, "grad_norm": 0.007855575531721115, "learning_rate": 4.944444444444445e-06, "loss": -0.0545, "num_tokens": 5387824.0, "reward": 0.72265625, "reward_std": 0.3997213840484619, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.9375, "step": 22 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.480857706210976, "calib/avg_num_step_conf": 4.28125, "calib/ece": 0.6531983805668015, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.7732793522267206, "calib/gap": -0.009895662175484654, "calib/mean_conf": 0.9220242914979757, "calib/mu_c": 0.9148529411764705, "calib/mu_w": 0.9247486033519552, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.6499595141700404, "calib/std_conf": 0.12176814522402662, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7559437086092715, "calib/step_q_c_n": 302.0, "calib/step_q_gap": -0.030645992946003786, "calib/step_q_w": 0.7865897015552753, "calib/step_q_w_n": 793.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1973.0, "completions/max_terminated_length": 1973.0, "completions/mean_length": 247.96875, "completions/mean_terminated_length": 247.96875, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.024533333333333334, "grad_norm": 0.008924001827836037, "learning_rate": 4.9166666666666665e-06, "loss": 0.0309, "num_tokens": 5555240.0, "reward": 0.734375, "reward_std": 0.3484991192817688, "rewards/accuracy_reward_step": 0.26953125, "rewards/format_reward_step": 0.9296875, "step": 23 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.451530612244898, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.709469387755102, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8081632653061225, "calib/gap": -0.015066137566137394, "calib/mean_conf": 0.9255510204081633, "calib/mu_c": 0.9139285714285715, "calib/mu_w": 0.9289947089947089, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7032244897959183, "calib/std_conf": 0.1260782169316851, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7920491803278689, "calib/step_q_c_n": 244.0, "calib/step_q_gap": 0.009168324930720173, "calib/step_q_w": 0.7828808553971487, "calib/step_q_w_n": 982.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2769.0, "completions/max_terminated_length": 2769.0, "completions/mean_length": 251.62109375, "completions/mean_terminated_length": 251.62109375, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.0256, "grad_norm": 0.006747386883944273, "learning_rate": 4.888888888888889e-06, "loss": 0.0257, "num_tokens": 5724167.0, "reward": 0.697265625, "reward_std": 0.2716612219810486, "rewards/accuracy_reward_step": 0.2265625, "rewards/format_reward_step": 0.94140625, "step": 24 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48230574324324316, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.6783534136546185, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.8032128514056225, "calib/gap": 0.007891047297297282, "calib/mean_conf": 0.9327309236947792, "calib/mu_c": 0.93859375, "calib/mu_w": 0.9307027027027027, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6770281124497992, "calib/std_conf": 0.1078587312700969, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8123549488054608, "calib/step_q_c_n": 293.0, "calib/step_q_gap": 0.018632785496871196, "calib/step_q_w": 0.7937221633085896, "calib/step_q_w_n": 943.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 562.0, "completions/max_terminated_length": 562.0, "completions/mean_length": 246.1328125, "completions/mean_terminated_length": 247.09805297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.02666666666666667, "grad_norm": 0.0071069239638745785, "learning_rate": 4.861111111111111e-06, "loss": 0.0321, "num_tokens": 5890401.0, "reward": 0.736328125, "reward_std": 0.33219289779663086, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.96484375, "step": 25 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5279473317056156, "calib/avg_num_step_conf": 5.06640625, "calib/ece": 0.694979919678715, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8313253012048193, "calib/gap": 0.0028749564004187933, "calib/mean_conf": 0.9319277108433734, "calib/mu_c": 0.9340983606557379, "calib/mu_w": 0.9312234042553191, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6909638554216868, "calib/std_conf": 0.11348220848109943, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7745049668874173, "calib/step_q_c_n": 302.0, "calib/step_q_gap": -0.023032922559819036, "calib/step_q_w": 0.7975378894472364, "calib/step_q_w_n": 995.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 269.98046875, "completions/mean_terminated_length": 269.98046875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.027733333333333332, "grad_norm": 0.006924426648765802, "learning_rate": 4.833333333333333e-06, "loss": 0.0364, "num_tokens": 6064756.0, "reward": 0.72265625, "reward_std": 0.3338688611984253, "rewards/accuracy_reward_step": 0.2421875, "rewards/format_reward_step": 0.9609375, "step": 26 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48424116424116426, "calib/avg_num_step_conf": 5.109375, "calib/ece": 0.683996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.848, "calib/gap": -0.011803326403326597, "calib/mean_conf": 0.9371959999999999, "calib/mu_c": 0.9284615384615383, "calib/mu_w": 0.9402648648648649, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6805960000000001, "calib/std_conf": 0.09535859470441037, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8074, "calib/step_q_c_n": 330.0, "calib/step_q_gap": -0.007578169734151263, "calib/step_q_w": 0.8149781697341513, "calib/step_q_w_n": 978.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 662.0, "completions/max_terminated_length": 662.0, "completions/mean_length": 252.43359375, "completions/mean_terminated_length": 253.4235382080078, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.0288, "grad_norm": 0.007143011782318354, "learning_rate": 4.805555555555556e-06, "loss": 0.002, "num_tokens": 6234595.0, "reward": 0.736328125, "reward_std": 0.360126793384552, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.96484375, "step": 27 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47192028985507245, "calib/avg_num_step_conf": 5.17578125, "calib/ece": 0.6729446640316206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8774703557312253, "calib/gap": 0.0005344202898549533, "calib/mean_conf": 0.9381620553359683, "calib/mu_c": 0.9385507246376811, "calib/mu_w": 0.9380163043478261, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6691897233201581, "calib/std_conf": 0.10872182940265937, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8142175226586104, "calib/step_q_c_n": 331.0, "calib/step_q_gap": -0.015080264061711701, "calib/step_q_w": 0.8292977867203221, "calib/step_q_w_n": 994.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 888.0, "completions/max_terminated_length": 888.0, "completions/mean_length": 278.08203125, "completions/mean_terminated_length": 279.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.029866666666666666, "grad_norm": 0.006403841078281403, "learning_rate": 4.777777777777778e-06, "loss": 0.0181, "num_tokens": 6412728.0, "reward": 0.76171875, "reward_std": 0.33537590503692627, "rewards/accuracy_reward_step": 0.26953125, "rewards/format_reward_step": 0.984375, "step": 28 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47236394557823125, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.7100937500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8828125, "calib/gap": 0.0041292517006803475, "calib/mean_conf": 0.9401718750000001, "calib/mu_c": 0.9433333333333335, "calib/mu_w": 0.9392040816326531, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.7079453125000001, "calib/std_conf": 0.11483341258747112, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8137987012987012, "calib/step_q_c_n": 308.0, "calib/step_q_gap": -0.00451997015551775, "calib/step_q_w": 0.818318671454219, "calib/step_q_w_n": 1114.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 893.0, "completions/max_terminated_length": 893.0, "completions/mean_length": 298.6484375, "completions/mean_terminated_length": 299.8196105957031, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.030933333333333334, "grad_norm": 0.005184306297451258, "learning_rate": 4.75e-06, "loss": -0.0327, "num_tokens": 6596310.0, "reward": 0.73046875, "reward_std": 0.2800149917602539, "rewards/accuracy_reward_step": 0.234375, "rewards/format_reward_step": 0.9921875, "step": 29 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.46903431763766956, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.6655822891566265, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.8554216867469879, "calib/gap": 0.004382139664804496, "calib/mean_conf": 0.9467067871485944, "calib/mu_c": 0.9498570000000001, "calib/mu_w": 0.9454748603351956, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6655822891566265, "calib/std_conf": 0.07900735903474725, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.832691556122449, "calib/step_q_c_n": 392.0, "calib/step_q_gap": 0.01874835223895388, "calib/step_q_w": 0.8139432038834952, "calib/step_q_w_n": 1030.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2168.0, "completions/max_terminated_length": 2168.0, "completions/mean_length": 309.96484375, "completions/mean_terminated_length": 309.96484375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.032, "grad_norm": 0.006838866975158453, "learning_rate": 4.722222222222222e-06, "loss": 0.0511, "num_tokens": 6782645.0, "reward": 0.76171875, "reward_std": 0.3281659185886383, "rewards/accuracy_reward_step": 0.27734375, "rewards/format_reward_step": 0.96875, "step": 30 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49033649698015536, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.6988406374501991, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8725099601593626, "calib/gap": 0.013340897325280432, "calib/mean_conf": 0.9418685258964143, "calib/mu_c": 0.9519672131147541, "calib/mu_w": 0.9386263157894736, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6988406374501991, "calib/std_conf": 0.09511378510473928, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8257632398753896, "calib/step_q_c_n": 321.0, "calib/step_q_gap": 0.025200228418761128, "calib/step_q_w": 0.8005630114566284, "calib/step_q_w_n": 1222.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2094.0, "completions/max_terminated_length": 2094.0, "completions/mean_length": 323.72265625, "completions/mean_terminated_length": 323.72265625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.03306666666666667, "grad_norm": 0.0056033809669315815, "learning_rate": 4.694444444444445e-06, "loss": 0.0189, "num_tokens": 6971430.0, "reward": 0.728515625, "reward_std": 0.3097946345806122, "rewards/accuracy_reward_step": 0.23828125, "rewards/format_reward_step": 0.98046875, "step": 31 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49714640198511173, "calib/avg_num_step_conf": 5.9921875, "calib/ece": 0.6846215139442231, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8565737051792829, "calib/gap": 0.006164598842018254, "calib/mean_conf": 0.9435856573705178, "calib/mu_c": 0.9481538461538461, "calib/mu_w": 0.9419892473118279, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6846215139442231, "calib/std_conf": 0.08074782220162297, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8077514792899407, "calib/step_q_c_n": 338.0, "calib/step_q_gap": 0.008119372266529257, "calib/step_q_w": 0.7996321070234115, "calib/step_q_w_n": 1196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1812.0, "completions/max_terminated_length": 1812.0, "completions/mean_length": 316.3203125, "completions/mean_terminated_length": 316.3203125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.034133333333333335, "grad_norm": 0.005981651600450277, "learning_rate": 4.666666666666667e-06, "loss": 0.0534, "num_tokens": 7159112.0, "reward": 0.740234375, "reward_std": 0.29884395003318787, "rewards/accuracy_reward_step": 0.25390625, "rewards/format_reward_step": 0.97265625, "step": 32 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49311424100156487, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.6779482071713147, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9243027888446215, "calib/gap": -0.00025508607198776456, "calib/mean_conf": 0.9608167330677292, "calib/mu_c": 0.9606338028169011, "calib/mu_w": 0.9608888888888889, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6779482071713147, "calib/std_conf": 0.0415897996286572, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8484463414634147, "calib/step_q_c_n": 492.0, "calib/step_q_gap": 0.015152258130081364, "calib/step_q_w": 0.8332940833333333, "calib/step_q_w_n": 1200.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1465.0, "completions/max_terminated_length": 1465.0, "completions/mean_length": 356.19140625, "completions/mean_terminated_length": 356.19140625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.0352, "grad_norm": 0.0054481374099850655, "learning_rate": 4.638888888888889e-06, "loss": 0.0144, "num_tokens": 7357169.0, "reward": 0.771484375, "reward_std": 0.2949320673942566, "rewards/accuracy_reward_step": 0.28125, "rewards/format_reward_step": 0.98046875, "step": 33 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46460021134202184, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.6082500000000001, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.0046491722437478344, "calib/mean_conf": 0.9455515873015873, "calib/mu_c": 0.942470588235294, "calib/mu_w": 0.9471197604790418, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6082500000000001, "calib/std_conf": 0.042571803460288424, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8041634980988593, "calib/step_q_c_n": 526.0, "calib/step_q_gap": 0.007002001748494302, "calib/step_q_w": 0.797161496350365, "calib/step_q_w_n": 1096.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2219.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 333.64453125, "completions/mean_terminated_length": 333.64453125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.03626666666666667, "grad_norm": 0.005749785806983709, "learning_rate": 4.611111111111112e-06, "loss": -0.0071, "num_tokens": 7547694.0, "reward": 0.822265625, "reward_std": 0.2967801094055176, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.98046875, "step": 34 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5824603521024297, "calib/avg_num_step_conf": 7.16796875, "calib/ece": 0.6306877470355731, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.8458498023715415, "calib/gap": 0.01081652844463854, "calib/mean_conf": 0.9429407114624507, "calib/mu_c": 0.950379746835443, "calib/mu_w": 0.9395632183908045, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6306877470355731, "calib/std_conf": 0.07253234553563628, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8060387323943662, "calib/step_q_c_n": 568.0, "calib/step_q_gap": -0.004912806674300163, "calib/step_q_w": 0.8109515390686663, "calib/step_q_w_n": 1267.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 927.0, "completions/max_terminated_length": 927.0, "completions/mean_length": 384.0, "completions/mean_terminated_length": 385.50592041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.037333333333333336, "grad_norm": 0.005311410874128342, "learning_rate": 4.583333333333333e-06, "loss": -0.0117, "num_tokens": 7755254.0, "reward": 0.802734375, "reward_std": 0.302001416683197, "rewards/accuracy_reward_step": 0.30859375, "rewards/format_reward_step": 0.98828125, "step": 35 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4753882685149532, "calib/avg_num_step_conf": 6.94140625, "calib/ece": 0.3556723320158103, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.7312252964426877, "calib/gap": -0.004662835322808423, "calib/mean_conf": 0.9214501976284585, "calib/mu_c": 0.9194965986394558, "calib/mu_w": 0.9241594339622642, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34804743083003953, "calib/std_conf": 0.10258914435497857, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7864002205071663, "calib/step_q_c_n": 907.0, "calib/step_q_gap": 0.007645622806016883, "calib/step_q_w": 0.7787545977011494, "calib/step_q_w_n": 870.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2170.0, "completions/max_terminated_length": 2170.0, "completions/mean_length": 368.97265625, "completions/mean_terminated_length": 370.41961669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.0384, "grad_norm": 0.006220624782145023, "learning_rate": 4.555555555555556e-06, "loss": 0.0187, "num_tokens": 7952423.0, "reward": 1.06640625, "reward_std": 0.3570103645324707, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.984375, "step": 36 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49999999999999994, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.4950980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6823529411764706, "calib/gap": 0.010418034857287206, "calib/mean_conf": 0.9118039215686274, "calib/mu_c": 0.9178504672897198, "calib/mu_w": 0.9074324324324325, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.49364705882352955, "calib/std_conf": 0.11364151057513168, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7692052023121386, "calib/step_q_c_n": 692.0, "calib/step_q_gap": 0.0002136848757579024, "calib/step_q_w": 0.7689915174363807, "calib/step_q_w_n": 1061.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2760.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 382.6015625, "completions/mean_terminated_length": 382.6015625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.039466666666666664, "grad_norm": 0.006087584886699915, "learning_rate": 4.527777777777778e-06, "loss": -0.0051, "num_tokens": 8157465.0, "reward": 0.912109375, "reward_std": 0.297029584646225, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.98828125, "step": 37 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4294117647058824, "calib/avg_num_step_conf": 7.4453125, "calib/ece": 0.5149603174603173, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6507936507936508, "calib/gap": -0.015439215686274377, "calib/mean_conf": 0.9161507936507937, "calib/mu_c": 0.9069607843137255, "calib/mu_w": 0.9223999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5131746031746031, "calib/std_conf": 0.06709287184009244, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7816969696969697, "calib/step_q_c_n": 660.0, "calib/step_q_gap": 0.01830414465684127, "calib/step_q_w": 0.7633928250401284, "calib/step_q_w_n": 1246.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2449.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 397.84765625, "completions/mean_terminated_length": 397.84765625, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.04053333333333333, "grad_norm": 0.006814414635300636, "learning_rate": 4.5e-06, "loss": 0.0884, "num_tokens": 8366202.0, "reward": 0.890625, "reward_std": 0.3247237503528595, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.984375, "step": 38 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45514705882352946, "calib/avg_num_step_conf": 7.30078125, "calib/ece": 0.5707874015748031, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6062992125984252, "calib/gap": -0.009245098039215582, "calib/mean_conf": 0.8953543307086614, "calib/mu_c": 0.8891666666666667, "calib/mu_w": 0.8984117647058822, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5677165354330709, "calib/std_conf": 0.11664803561795813, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7312433392539964, "calib/step_q_c_n": 563.0, "calib/step_q_gap": -0.03334318448260387, "calib/step_q_w": 0.7645865237366003, "calib/step_q_w_n": 1306.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1102.0, "completions/max_terminated_length": 1102.0, "completions/mean_length": 388.00390625, "completions/mean_terminated_length": 389.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.0416, "grad_norm": 0.005457035731524229, "learning_rate": 4.472222222222223e-06, "loss": 0.0047, "num_tokens": 8571619.0, "reward": 0.828125, "reward_std": 0.2757005989551544, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.9921875, "step": 39 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45176423946081673, "calib/avg_num_step_conf": 8.60546875, "calib/ece": 0.5330588235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.6431372549019608, "calib/gap": -0.004187260473106802, "calib/mean_conf": 0.901686274509804, "calib/mu_c": 0.8990425531914894, "calib/mu_w": 0.9032298136645962, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5330588235294118, "calib/std_conf": 0.0936152933283032, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7234317862165963, "calib/step_q_c_n": 711.0, "calib/step_q_gap": -0.06037699394426166, "calib/step_q_w": 0.783808780160858, "calib/step_q_w_n": 1492.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1915.0, "completions/max_terminated_length": 1915.0, "completions/mean_length": 455.390625, "completions/mean_terminated_length": 455.390625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.042666666666666665, "grad_norm": 0.005192178767174482, "learning_rate": 4.444444444444444e-06, "loss": 0.0217, "num_tokens": 8794959.0, "reward": 0.865234375, "reward_std": 0.2953948974609375, "rewards/accuracy_reward_step": 0.3671875, "rewards/format_reward_step": 0.99609375, "step": 40 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49226804123711343, "calib/avg_num_step_conf": 7.61328125, "calib/ece": 0.2926693227091633, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5856573705179283, "calib/gap": 0.00016668898112182085, "calib/mean_conf": 0.8964940239043824, "calib/mu_c": 0.8965584415584416, "calib/mu_w": 0.8963917525773197, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.287808764940239, "calib/std_conf": 0.09422603029419956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7469074421513446, "calib/step_q_c_n": 1066.0, "calib/step_q_gap": -0.01468372432657139, "calib/step_q_w": 0.761591166477916, "calib/step_q_w_n": 883.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 399.8828125, "completions/mean_terminated_length": 401.4510192871094, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.04373333333333333, "grad_norm": 0.005844591651111841, "learning_rate": 4.416666666666667e-06, "loss": 0.0438, "num_tokens": 9004577.0, "reward": 1.091796875, "reward_std": 0.3158378601074219, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.98046875, "step": 41 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5313282442748092, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.40785156250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.5859375, "calib/gap": 0.01725007633587794, "calib/mean_conf": 0.8961328125000001, "calib/mu_c": 0.90496, "calib/mu_w": 0.887709923664122, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40785156250000004, "calib/std_conf": 0.09651488478903056, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7085758293838863, "calib/step_q_c_n": 844.0, "calib/step_q_gap": -0.00878613380629778, "calib/step_q_w": 0.7173619631901841, "calib/step_q_w_n": 978.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1047.0, "completions/max_terminated_length": 1047.0, "completions/mean_length": 346.79296875, "completions/mean_terminated_length": 348.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.0448, "grad_norm": 0.005971704609692097, "learning_rate": 4.388888888888889e-06, "loss": 0.0239, "num_tokens": 9197724.0, "reward": 0.98828125, "reward_std": 0.28300461173057556, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 1.0, "step": 42 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5105181145305805, "calib/avg_num_step_conf": 7.57421875, "calib/ece": 0.5034387351778657, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.616600790513834, "calib/gap": 0.006023893000908664, "calib/mean_conf": 0.906600790513834, "calib/mu_c": 0.9101960784313724, "calib/mu_w": 0.9041721854304637, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5034387351778657, "calib/std_conf": 0.0662095332400494, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7505651558073655, "calib/step_q_c_n": 706.0, "calib/step_q_gap": 0.012503517526749164, "calib/step_q_w": 0.7380616382806163, "calib/step_q_w_n": 1233.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1910.0, "completions/max_terminated_length": 1910.0, "completions/mean_length": 402.4765625, "completions/mean_terminated_length": 404.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.04586666666666667, "grad_norm": 0.005978772882372141, "learning_rate": 4.361111111111112e-06, "loss": 0.0315, "num_tokens": 9405982.0, "reward": 0.888671875, "reward_std": 0.3324914872646332, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.98046875, "step": 43 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4941358024691358, "calib/avg_num_step_conf": 8.42578125, "calib/ece": 0.5652380952380953, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6587301587301587, "calib/gap": -0.007876543209876519, "calib/mean_conf": 0.9070634920634921, "calib/mu_c": 0.902, "calib/mu_w": 0.9098765432098765, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5575793650793651, "calib/std_conf": 0.09409609627310389, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7202049780380673, "calib/step_q_c_n": 683.0, "calib/step_q_gap": 0.003715832854892187, "calib/step_q_w": 0.7164891451831751, "calib/step_q_w_n": 1474.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 462.9609375, "completions/mean_terminated_length": 462.9609375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.046933333333333334, "grad_norm": 0.005756228230893612, "learning_rate": 4.333333333333334e-06, "loss": 0.082, "num_tokens": 9630820.0, "reward": 0.84375, "reward_std": 0.30472099781036377, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.984375, "step": 44 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5135653871177619, "calib/avg_num_step_conf": 8.78515625, "calib/ece": 0.5013147410358565, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.6972111553784861, "calib/gap": 0.0026278464541313884, "calib/mean_conf": 0.9187649402390439, "calib/mu_c": 0.9202830188679245, "calib/mu_w": 0.9176551724137931, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4988844621513943, "calib/std_conf": 0.07374763502114198, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7569809322033897, "calib/step_q_c_n": 944.0, "calib/step_q_gap": 0.004582464770439554, "calib/step_q_w": 0.7523984674329501, "calib/step_q_w_n": 1305.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2497.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 480.34765625, "completions/mean_terminated_length": 480.34765625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.048, "grad_norm": 0.005964437033981085, "learning_rate": 4.305555555555556e-06, "loss": 0.0476, "num_tokens": 9858837.0, "reward": 0.904296875, "reward_std": 0.36644038558006287, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 0.98046875, "step": 45 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.476935914552737, "calib/avg_num_step_conf": 9.515625, "calib/ece": 0.5004048582995951, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.7813765182186235, "calib/gap": 5.6074766355163064e-05, "calib/mean_conf": 0.9220242914979755, "calib/mu_c": 0.9220560747663551, "calib/mu_w": 0.9219999999999999, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4946153846153846, "calib/std_conf": 0.09860952016211864, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7414906832298138, "calib/step_q_c_n": 805.0, "calib/step_q_gap": 0.011960027190573985, "calib/step_q_w": 0.7295306560392398, "calib/step_q_w_n": 1631.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 497.73046875, "completions/mean_terminated_length": 497.73046875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.04906666666666667, "grad_norm": 0.005311571527272463, "learning_rate": 4.277777777777778e-06, "loss": 0.0152, "num_tokens": 10091024.0, "reward": 0.8984375, "reward_std": 0.3058202266693115, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.9609375, "step": 46 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4700066137566138, "calib/avg_num_step_conf": 9.84765625, "calib/ece": 0.4472357723577235, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8048780487804879, "calib/gap": 0.005452380952380764, "calib/mean_conf": 0.935040650406504, "calib/mu_c": 0.9378333333333333, "calib/mu_w": 0.9323809523809525, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4472357723577235, "calib/std_conf": 0.08181750447551771, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.731946446961895, "calib/step_q_c_n": 971.0, "calib/step_q_gap": 0.007961930832862785, "calib/step_q_w": 0.7239845161290323, "calib/step_q_w_n": 1550.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2475.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 499.41015625, "completions/mean_terminated_length": 501.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.050133333333333335, "grad_norm": 0.005696811247617006, "learning_rate": 4.25e-06, "loss": 0.0391, "num_tokens": 10324849.0, "reward": 0.94921875, "reward_std": 0.3375682234764099, "rewards/accuracy_reward_step": 0.46875, "rewards/format_reward_step": 0.9609375, "step": 47 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46742485238862047, "calib/avg_num_step_conf": 7.88671875, "calib/ece": 0.5768110236220473, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7716535433070866, "calib/gap": -0.012356414385400027, "calib/mean_conf": 0.9311417322834645, "calib/mu_c": 0.9232608695652172, "calib/mu_w": 0.9356172839506173, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5728740157480315, "calib/std_conf": 0.08088995386554051, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7187003058103976, "calib/step_q_c_n": 654.0, "calib/step_q_gap": 0.02775455800981097, "calib/step_q_w": 0.6909457478005866, "calib/step_q_w_n": 1364.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1657.0, "completions/max_terminated_length": 1657.0, "completions/mean_length": 421.19140625, "completions/mean_terminated_length": 421.19140625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.0512, "grad_norm": 0.006146811414510012, "learning_rate": 4.222222222222223e-06, "loss": 0.0513, "num_tokens": 10536362.0, "reward": 0.853515625, "reward_std": 0.3062730133533478, "rewards/accuracy_reward_step": 0.359375, "rewards/format_reward_step": 0.98828125, "step": 48 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5641134566368211, "calib/avg_num_step_conf": 8.5390625, "calib/ece": 0.5155599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.84, "calib/gap": 0.012402457355728536, "calib/mean_conf": 0.94356, "calib/mu_c": 0.9506542056074767, "calib/mu_w": 0.9382517482517482, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5155599999999999, "calib/std_conf": 0.050791007078025134, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7094543297746145, "calib/step_q_c_n": 843.0, "calib/step_q_gap": -0.002788335899249872, "calib/step_q_w": 0.7122426656738644, "calib/step_q_w_n": 1343.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 465.72265625, "completions/mean_terminated_length": 465.72265625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.05226666666666667, "grad_norm": 0.0052592456340789795, "learning_rate": 4.194444444444445e-06, "loss": 0.0491, "num_tokens": 10760123.0, "reward": 0.90625, "reward_std": 0.24188709259033203, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.9765625, "step": 49 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5087383056133057, "calib/avg_num_step_conf": 9.5859375, "calib/ece": 0.360436507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8849206349206349, "calib/gap": 0.009737525987526086, "calib/mean_conf": 0.9477380952380952, "calib/mu_c": 0.9517567567567569, "calib/mu_w": 0.9420192307692308, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.360436507936508, "calib/std_conf": 0.05085814048424047, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7369622448979591, "calib/step_q_c_n": 1274.0, "calib/step_q_gap": 0.015808724965813203, "calib/step_q_w": 0.7211535199321459, "calib/step_q_w_n": 1179.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 508.54296875, "completions/mean_terminated_length": 510.53729248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.05333333333333334, "grad_norm": 0.00552935479208827, "learning_rate": 4.166666666666667e-06, "loss": 0.0365, "num_tokens": 10995670.0, "reward": 1.068359375, "reward_std": 0.3270677626132965, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.98046875, "step": 50 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.38816475495307606, "calib/avg_num_step_conf": 9.22265625, "calib/ece": 0.5006425702811244, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8795180722891566, "calib/gap": -0.008662669447340954, "calib/mean_conf": 0.9436947791164659, "calib/mu_c": 0.9389285714285714, "calib/mu_w": 0.9475912408759124, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4972690763052207, "calib/std_conf": 0.08355554637588156, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7176267529665588, "calib/step_q_c_n": 927.0, "calib/step_q_gap": 0.03721873344075688, "calib/step_q_w": 0.6804080195258019, "calib/step_q_w_n": 1434.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2303.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 480.24609375, "completions/mean_terminated_length": 480.24609375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.0544, "grad_norm": 0.005861993413418531, "learning_rate": 4.138888888888889e-06, "loss": 0.0563, "num_tokens": 11227909.0, "reward": 0.923828125, "reward_std": 0.3243404030799866, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.97265625, "step": 51 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.493083885772565, "calib/avg_num_step_conf": 8.140625, "calib/ece": 0.35625984251968495, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.84251968503937, "calib/gap": 0.009509178990311118, "calib/mean_conf": 0.9389370078740158, "calib/mu_c": 0.9429054054054055, "calib/mu_w": 0.9333962264150943, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35625984251968495, "calib/std_conf": 0.056079702565777566, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7226880811496197, "calib/step_q_c_n": 1183.0, "calib/step_q_gap": 0.009813497353837297, "calib/step_q_w": 0.7128745837957824, "calib/step_q_w_n": 901.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2064.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 425.03125, "completions/mean_terminated_length": 425.03125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.055466666666666664, "grad_norm": 0.006341880187392235, "learning_rate": 4.111111111111111e-06, "loss": 0.0423, "num_tokens": 11444669.0, "reward": 1.07421875, "reward_std": 0.30103766918182373, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9921875, "step": 52 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5410216718266254, "calib/avg_num_step_conf": 9.28125, "calib/ece": 0.4079600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.888, "calib/gap": 0.009730392156862666, "calib/mean_conf": 0.95196, "calib/mu_c": 0.9563970588235293, "calib/mu_w": 0.9466666666666667, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4079600000000001, "calib/std_conf": 0.04056301763922403, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7413472333600641, "calib/step_q_c_n": 1247.0, "calib/step_q_gap": 0.07657221121657432, "calib/step_q_w": 0.6647750221434898, "calib/step_q_w_n": 1129.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1956.0, "completions/max_terminated_length": 1956.0, "completions/mean_length": 474.7265625, "completions/mean_terminated_length": 476.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.05653333333333333, "grad_norm": 0.005445745773613453, "learning_rate": 4.083333333333334e-06, "loss": 0.0153, "num_tokens": 11672023.0, "reward": 1.01953125, "reward_std": 0.26240503787994385, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.9765625, "step": 53 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4626398946675445, "calib/avg_num_step_conf": 8.015625, "calib/ece": 0.33367193675889323, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9090909090909091, "calib/gap": -0.004582290980908232, "calib/mean_conf": 0.9461620553359683, "calib/mu_c": 0.9443870967741937, "calib/mu_w": 0.948969387755102, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33359288537549403, "calib/std_conf": 0.03997402099700347, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7446314907872696, "calib/step_q_c_n": 1194.0, "calib/step_q_gap": 0.022119835775614716, "calib/step_q_w": 0.7225116550116549, "calib/step_q_w_n": 858.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2277.0, "completions/max_terminated_length": 2277.0, "completions/mean_length": 412.94921875, "completions/mean_terminated_length": 412.94921875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.0576, "grad_norm": 0.006437535397708416, "learning_rate": 4.055555555555556e-06, "loss": -0.0031, "num_tokens": 11883970.0, "reward": 1.103515625, "reward_std": 0.28321897983551025, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.98828125, "step": 54 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4576520601700458, "calib/avg_num_step_conf": 8.60546875, "calib/ece": 0.49542168674698794, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.8273092369477911, "calib/gap": 0.003570307390451277, "calib/mean_conf": 0.9371887550200803, "calib/mu_c": 0.9391818181818182, "calib/mu_w": 0.935611510791367, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.49542168674698794, "calib/std_conf": 0.060001075241839896, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7288025477707006, "calib/step_q_c_n": 785.0, "calib/step_q_gap": -0.007994913442275431, "calib/step_q_w": 0.7367974612129761, "calib/step_q_w_n": 1418.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 471.953125, "completions/mean_terminated_length": 471.953125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.058666666666666666, "grad_norm": 0.0072900657542049885, "learning_rate": 4.027777777777779e-06, "loss": 0.0964, "num_tokens": 12112614.0, "reward": 0.9140625, "reward_std": 0.32115888595581055, "rewards/accuracy_reward_step": 0.4296875, "rewards/format_reward_step": 0.96875, "step": 55 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47011436153408737, "calib/avg_num_step_conf": 10.078125, "calib/ece": 0.4711507936507937, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8849206349206349, "calib/gap": 0.0061875276426359704, "calib/mean_conf": 0.9433730158730159, "calib/mu_c": 0.9466386554621848, "calib/mu_w": 0.9404511278195489, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4711507936507937, "calib/std_conf": 0.06829072238540447, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7589653809971777, "calib/step_q_c_n": 1063.0, "calib/step_q_gap": -0.012593617025234871, "calib/step_q_w": 0.7715589980224126, "calib/step_q_w_n": 1517.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 527.3046875, "completions/mean_terminated_length": 527.3046875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.05973333333333333, "grad_norm": 0.006298091262578964, "learning_rate": 4.000000000000001e-06, "loss": 0.1152, "num_tokens": 12354444.0, "reward": 0.955078125, "reward_std": 0.3263026773929596, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.98046875, "step": 56 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48933038999264167, "calib/avg_num_step_conf": 9.140625, "calib/ece": 0.34363999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.908, "calib/gap": 0.007482105826477015, "calib/mean_conf": 0.9426, "calib/mu_c": 0.945562913907285, "calib/mu_w": 0.938080808080808, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3411199999999999, "calib/std_conf": 0.07157681188764975, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7741493055555555, "calib/step_q_c_n": 1152.0, "calib/step_q_gap": 0.02985469276094277, "calib/step_q_w": 0.7442946127946127, "calib/step_q_w_n": 1188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2409.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 478.20703125, "completions/mean_terminated_length": 480.0823669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.0608, "grad_norm": 0.005231020972132683, "learning_rate": 3.972222222222223e-06, "loss": 0.0412, "num_tokens": 12583657.0, "reward": 1.076171875, "reward_std": 0.2724471092224121, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.97265625, "step": 57 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.569040650406504, "calib/avg_num_step_conf": 10.88671875, "calib/ece": 0.43987903225806435, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.8709677419354839, "calib/gap": 0.026591869918699107, "calib/mean_conf": 0.9335887096774192, "calib/mu_c": 0.9469918699186991, "calib/mu_w": 0.9204, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.43874999999999986, "calib/std_conf": 0.10877073492329829, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7726014109347442, "calib/step_q_c_n": 1134.0, "calib/step_q_gap": 0.019956825332808403, "calib/step_q_w": 0.7526445856019358, "calib/step_q_w_n": 1653.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 575.01171875, "completions/mean_terminated_length": 575.01171875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.06186666666666667, "grad_norm": 0.00627827737480402, "learning_rate": 3.944444444444445e-06, "loss": 0.0552, "num_tokens": 12837180.0, "reward": 0.96484375, "reward_std": 0.38476935029029846, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.96875, "step": 58 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4412151170066325, "calib/avg_num_step_conf": 9.04296875, "calib/ece": 0.4275098814229249, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.8893280632411067, "calib/gap": -0.0067269428106621065, "calib/mean_conf": 0.9433201581027667, "calib/mu_c": 0.9400763358778625, "calib/mu_w": 0.9468032786885247, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4265217391304348, "calib/std_conf": 0.0421746353906081, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7586098930481283, "calib/step_q_c_n": 1122.0, "calib/step_q_gap": 0.011274268572017743, "calib/step_q_w": 0.7473356244761106, "calib/step_q_w_n": 1193.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 473.15234375, "completions/mean_terminated_length": 475.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.06293333333333333, "grad_norm": 0.006247291341423988, "learning_rate": 3.916666666666667e-06, "loss": 0.0177, "num_tokens": 13064555.0, "reward": 1.009765625, "reward_std": 0.3327410817146301, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.98828125, "step": 59 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.411923076923077, "calib/avg_num_step_conf": 8.53515625, "calib/ece": 0.45959999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.804, "calib/gap": -0.013160256410256421, "calib/mean_conf": 0.93376, "calib/mu_c": 0.9269166666666666, "calib/mu_w": 0.940076923076923, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.45668, "calib/std_conf": 0.051360124610440726, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.752872340425532, "calib/step_q_c_n": 846.0, "calib/step_q_gap": 0.03690074102343932, "calib/step_q_w": 0.7159715994020927, "calib/step_q_w_n": 1338.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2456.0, "completions/max_terminated_length": 2456.0, "completions/mean_length": 473.87890625, "completions/mean_terminated_length": 473.87890625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.064, "grad_norm": 0.006815559696406126, "learning_rate": 3.88888888888889e-06, "loss": 0.0897, "num_tokens": 13294724.0, "reward": 0.955078125, "reward_std": 0.3364248275756836, "rewards/accuracy_reward_step": 0.46875, "rewards/format_reward_step": 0.97265625, "step": 60 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49205002513826046, "calib/avg_num_step_conf": 7.77734375, "calib/ece": 0.39075098814229225, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.782608695652174, "calib/gap": 0.010588235294117676, "calib/mean_conf": 0.9256916996047431, "calib/mu_c": 0.9305882352941177, "calib/mu_w": 0.92, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3894466403162053, "calib/std_conf": 0.09515961113404409, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7580360465116279, "calib/step_q_c_n": 946.0, "calib/step_q_gap": 0.04087815177478571, "calib/step_q_w": 0.7171578947368422, "calib/step_q_w_n": 1045.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2257.0, "completions/max_terminated_length": 2257.0, "completions/mean_length": 405.0078125, "completions/mean_terminated_length": 405.0078125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06506666666666666, "grad_norm": 0.006788547150790691, "learning_rate": 3.861111111111112e-06, "loss": 0.017, "num_tokens": 13502470.0, "reward": 1.0234375, "reward_std": 0.2872949242591858, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.984375, "step": 61 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47897445553254253, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.4763779527559056, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6968503937007874, "calib/gap": -0.0037042615954308022, "calib/mean_conf": 0.9212598425196851, "calib/mu_c": 0.9192035398230088, "calib/mu_w": 0.9229078014184396, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4763779527559056, "calib/std_conf": 0.06599919023583715, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7541038318912237, "calib/step_q_c_n": 809.0, "calib/step_q_gap": -0.003176692453345731, "calib/step_q_w": 0.7572805243445694, "calib/step_q_w_n": 1335.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 447.69140625, "completions/mean_terminated_length": 449.44708251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.06613333333333334, "grad_norm": 0.005980294197797775, "learning_rate": 3.833333333333334e-06, "loss": 0.0309, "num_tokens": 13724159.0, "reward": 0.935546875, "reward_std": 0.3506472110748291, "rewards/accuracy_reward_step": 0.44140625, "rewards/format_reward_step": 0.98828125, "step": 62 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4617628205128205, "calib/avg_num_step_conf": 8.5546875, "calib/ece": 0.43656000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": -0.0066858974358974255, "calib/mean_conf": 0.9165599999999999, "calib/mu_c": 0.9130833333333332, "calib/mu_w": 0.9197692307692307, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43656000000000006, "calib/std_conf": 0.06672455619934838, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7589250814332248, "calib/step_q_c_n": 921.0, "calib/step_q_gap": 0.014565585767346123, "calib/step_q_w": 0.7443594956658787, "calib/step_q_w_n": 1269.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 508.91796875, "completions/mean_terminated_length": 508.91796875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.0672, "grad_norm": 0.006933415308594704, "learning_rate": 3.8055555555555556e-06, "loss": 0.1136, "num_tokens": 13963082.0, "reward": 0.95703125, "reward_std": 0.38261866569519043, "rewards/accuracy_reward_step": 0.46875, "rewards/format_reward_step": 0.9765625, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.493006993006993, "calib/avg_num_step_conf": 6.98828125, "calib/ece": 0.36474308300395264, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6007905138339921, "calib/gap": -0.012349650349650299, "calib/mean_conf": 0.8968379446640315, "calib/mu_c": 0.8914685314685316, "calib/mu_w": 0.9038181818181819, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3481818181818182, "calib/std_conf": 0.11424040866203894, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7302215189873418, "calib/step_q_c_n": 948.0, "calib/step_q_gap": 0.011339235990909025, "calib/step_q_w": 0.7188822829964328, "calib/step_q_w_n": 841.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 404.09375, "completions/mean_terminated_length": 404.09375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.06826666666666667, "grad_norm": 0.006458755116909742, "learning_rate": 3.777777777777778e-06, "loss": 0.0435, "num_tokens": 14170306.0, "reward": 1.052734375, "reward_std": 0.2793656885623932, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4552754435107376, "calib/avg_num_step_conf": 6.6484375, "calib/ece": 0.3721259842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5511811023622047, "calib/gap": -0.005517584811702503, "calib/mean_conf": 0.9036220472440945, "calib/mu_c": 0.901037037037037, "calib/mu_w": 0.9065546218487395, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3721259842519685, "calib/std_conf": 0.06376839603453004, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7453848684210526, "calib/step_q_c_n": 912.0, "calib/step_q_gap": 0.01815702031978672, "calib/step_q_w": 0.7272278481012658, "calib/step_q_w_n": 790.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1137.0, "completions/max_terminated_length": 1137.0, "completions/mean_length": 365.46484375, "completions/mean_terminated_length": 365.46484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.06933333333333333, "grad_norm": 0.006170464679598808, "learning_rate": 3.7500000000000005e-06, "loss": -0.0123, "num_tokens": 14368889.0, "reward": 1.0234375, "reward_std": 0.22531628608703613, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4909408773045136, "calib/avg_num_step_conf": 8.23046875, "calib/ece": 0.48683794466403163, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6245059288537549, "calib/gap": -0.00655244755244766, "calib/mean_conf": 0.913794466403162, "calib/mu_c": 0.9100909090909091, "calib/mu_w": 0.9166433566433567, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4829249011857707, "calib/std_conf": 0.061488689542966525, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7438579099545641, "calib/step_q_c_n": 807.0, "calib/step_q_gap": 0.09218137149302574, "calib/step_q_w": 0.6516765384615384, "calib/step_q_w_n": 1300.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2443.0, "completions/max_terminated_length": 2443.0, "completions/mean_length": 483.0546875, "completions/mean_terminated_length": 483.0546875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.0704, "grad_norm": 0.006618858780711889, "learning_rate": 3.7222222222222225e-06, "loss": 0.0437, "num_tokens": 14598903.0, "reward": 0.923828125, "reward_std": 0.28461480140686035, "rewards/accuracy_reward_step": 0.4296875, "rewards/format_reward_step": 0.98828125, "step": 66 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5161099339399228, "calib/avg_num_step_conf": 7.0234375, "calib/ece": 0.3441568627450981, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5843137254901961, "calib/gap": 0.014226598529228585, "calib/mean_conf": 0.9010196078431372, "calib/mu_c": 0.9073239436619719, "calib/mu_w": 0.8930973451327433, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3441568627450981, "calib/std_conf": 0.0878977771242005, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7455732484076433, "calib/step_q_c_n": 942.0, "calib/step_q_gap": 0.039343809155306775, "calib/step_q_w": 0.7062294392523365, "calib/step_q_w_n": 856.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1691.0, "completions/max_terminated_length": 1691.0, "completions/mean_length": 428.17578125, "completions/mean_terminated_length": 429.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.07146666666666666, "grad_norm": 0.0060368194244802, "learning_rate": 3.694444444444445e-06, "loss": 0.0359, "num_tokens": 14813524.0, "reward": 1.052734375, "reward_std": 0.2020040899515152, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.99609375, "step": 67 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4300887664524028, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.37574218750000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.578125, "calib/gap": -0.008566268748086858, "calib/mean_conf": 0.9030859375, "calib/mu_c": 0.8990370370370371, "calib/mu_w": 0.9076033057851239, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37574218750000005, "calib/std_conf": 0.06972776609605454, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7468965517241378, "calib/step_q_c_n": 870.0, "calib/step_q_gap": 0.03230253463012056, "calib/step_q_w": 0.7145940170940173, "calib/step_q_w_n": 936.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 396.6875, "completions/mean_terminated_length": 398.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.07253333333333334, "grad_norm": 0.006941006053239107, "learning_rate": 3.6666666666666666e-06, "loss": -0.0138, "num_tokens": 15019164.0, "reward": 1.025390625, "reward_std": 0.26476049423217773, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.99609375, "step": 68 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.41418971310416125, "calib/avg_num_step_conf": 7.546875, "calib/ece": 0.5014285714285714, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6309523809523809, "calib/gap": -0.029346084259498584, "calib/mean_conf": 0.9038888888888889, "calib/mu_c": 0.8868867924528301, "calib/mu_w": 0.9162328767123287, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.49234126984126986, "calib/std_conf": 0.09921895158050041, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7287564102564104, "calib/step_q_c_n": 780.0, "calib/step_q_gap": 0.005631670673077016, "calib/step_q_w": 0.7231247395833333, "calib/step_q_w_n": 1152.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2034.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 487.453125, "completions/mean_terminated_length": 489.36474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.0736, "grad_norm": 0.006305481307208538, "learning_rate": 3.638888888888889e-06, "loss": 0.0023, "num_tokens": 15248448.0, "reward": 0.90625, "reward_std": 0.2847837507724762, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 0.984375, "step": 69 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4276155717761557, "calib/avg_num_step_conf": 7.65234375, "calib/ece": 0.44107569721115536, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.549800796812749, "calib/gap": -0.006924061979767027, "calib/mean_conf": 0.890796812749004, "calib/mu_c": 0.8870175438596491, "calib/mu_w": 0.8939416058394162, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4388446215139442, "calib/std_conf": 0.10985757961381803, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7327112258064516, "calib/step_q_c_n": 775.0, "calib/step_q_gap": 0.016503455536181377, "calib/step_q_w": 0.7162077702702703, "calib/step_q_w_n": 1184.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 463.96484375, "completions/mean_terminated_length": 463.96484375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.07466666666666667, "grad_norm": 0.00585549883544445, "learning_rate": 3.6111111111111115e-06, "loss": 0.0599, "num_tokens": 15474215.0, "reward": 0.935546875, "reward_std": 0.23678159713745117, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.98046875, "step": 70 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5046290343320046, "calib/avg_num_step_conf": 8.28125, "calib/ece": 0.48552941176470577, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.47058823529411764, "calib/gap": 0.010653208177960893, "calib/mean_conf": 0.8798039215686273, "calib/mu_c": 0.8862376237623765, "calib/mu_w": 0.8755844155844156, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48462745098039206, "calib/std_conf": 0.11227225655728326, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7350923076923077, "calib/step_q_c_n": 715.0, "calib/step_q_gap": 0.0069001368738023094, "calib/step_q_w": 0.7281921708185054, "calib/step_q_w_n": 1405.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2064.0, "completions/max_terminated_length": 2064.0, "completions/mean_length": 489.91015625, "completions/mean_terminated_length": 489.91015625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.07573333333333333, "grad_norm": 0.006372661795467138, "learning_rate": 3.5833333333333335e-06, "loss": 0.0244, "num_tokens": 15704040.0, "reward": 0.890625, "reward_std": 0.3063148856163025, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.9921875, "step": 71 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.39437746062992124, "calib/avg_num_step_conf": 7.5078125, "calib/ece": 0.3997254901960786, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.48627450980392156, "calib/gap": -0.02038939468503942, "calib/mean_conf": 0.8896078431372548, "calib/mu_c": 0.8794531250000001, "calib/mu_w": 0.8998425196850395, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3936862745098041, "calib/std_conf": 0.08992722077616107, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7377215189873417, "calib/step_q_c_n": 869.0, "calib/step_q_gap": 0.028786666185822374, "calib/step_q_w": 0.7089348528015194, "calib/step_q_w_n": 1053.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2255.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 410.21484375, "completions/mean_terminated_length": 410.21484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.0768, "grad_norm": 0.008393477648496628, "learning_rate": 3.555555555555556e-06, "loss": 0.0227, "num_tokens": 15913463.0, "reward": 0.998046875, "reward_std": 0.296690434217453, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.99609375, "step": 72 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4570565797838525, "calib/avg_num_step_conf": 6.86328125, "calib/ece": 0.3254545454545452, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.48616600790513836, "calib/gap": -0.0005804195804196555, "calib/mean_conf": 0.8906719367588934, "calib/mu_c": 0.8904195804195802, "calib/mu_w": 0.8909999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3254545454545452, "calib/std_conf": 0.0767930788514829, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7313693901035674, "calib/step_q_c_n": 869.0, "calib/step_q_gap": 0.03222524595942333, "calib/step_q_w": 0.699144144144144, "calib/step_q_w_n": 888.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 415.4921875, "completions/mean_terminated_length": 417.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.07786666666666667, "grad_norm": 0.007174800615757704, "learning_rate": 3.5277777777777784e-06, "loss": 0.0286, "num_tokens": 16126861.0, "reward": 1.052734375, "reward_std": 0.3073679804801941, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.98828125, "step": 73 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4718685031185031, "calib/avg_num_step_conf": 7.62890625, "calib/ece": 0.4692857142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.42063492063492064, "calib/gap": -0.004851871101870908, "calib/mean_conf": 0.8819841269841271, "calib/mu_c": 0.8791346153846155, "calib/mu_w": 0.8839864864864864, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4692857142857143, "calib/std_conf": 0.0748333581638504, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.727112582781457, "calib/step_q_c_n": 755.0, "calib/step_q_gap": 0.01272193169631508, "calib/step_q_w": 0.7143906510851419, "calib/step_q_w_n": 1198.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2249.0, "completions/max_terminated_length": 2249.0, "completions/mean_length": 435.9296875, "completions/mean_terminated_length": 437.6392517089844, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.07893333333333333, "grad_norm": 0.008243421092629433, "learning_rate": 3.5e-06, "loss": 0.047, "num_tokens": 16342387.0, "reward": 0.89453125, "reward_std": 0.3572673499584198, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.9765625, "step": 74 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4807967313585291, "calib/avg_num_step_conf": 7.734375, "calib/ece": 0.24551181102362202, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4448818897637795, "calib/gap": -0.0036036772216547774, "calib/mean_conf": 0.8865354330708661, "calib/mu_c": 0.8852727272727272, "calib/mu_w": 0.888876404494382, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24122047244094486, "calib/std_conf": 0.06912134659800193, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7340513557929335, "calib/step_q_c_n": 1217.0, "calib/step_q_gap": 0.005912430498044907, "calib/step_q_w": 0.7281389252948886, "calib/step_q_w_n": 763.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 421.14453125, "completions/mean_terminated_length": 421.14453125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.08, "grad_norm": 0.006616497877985239, "learning_rate": 3.4722222222222224e-06, "loss": 0.0201, "num_tokens": 16554952.0, "reward": 1.140625, "reward_std": 0.2210792601108551, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.9921875, "step": 75 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49542763157894737, "calib/avg_num_step_conf": 7.7578125, "calib/ece": 0.2944444444444444, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.4523809523809524, "calib/gap": -0.004602631578947247, "calib/mean_conf": 0.8795238095238096, "calib/mu_c": 0.8776973684210526, "calib/mu_w": 0.8822999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2853968253968253, "calib/std_conf": 0.09486295162937285, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7230081227436824, "calib/step_q_c_n": 1108.0, "calib/step_q_gap": 0.016807667162816675, "calib/step_q_w": 0.7062004555808657, "calib/step_q_w_n": 878.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 445.609375, "completions/mean_terminated_length": 447.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.08106666666666666, "grad_norm": 0.008111460134387016, "learning_rate": 3.444444444444445e-06, "loss": 0.0276, "num_tokens": 16772084.0, "reward": 1.0859375, "reward_std": 0.34697067737579346, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.984375, "step": 76 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45514851485148516, "calib/avg_num_step_conf": 7.65625, "calib/ece": 0.27804780876494023, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.41832669322709165, "calib/gap": -0.010414521452145498, "calib/mean_conf": 0.8756573705179284, "calib/mu_c": 0.8714666666666665, "calib/mu_w": 0.881881188118812, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27804780876494023, "calib/std_conf": 0.08224360215285961, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7159497487437188, "calib/step_q_c_n": 995.0, "calib/step_q_gap": 0.032706225427656666, "calib/step_q_w": 0.6832435233160621, "calib/step_q_w_n": 965.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 423.62109375, "completions/mean_terminated_length": 423.62109375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.08213333333333334, "grad_norm": 0.007898428477346897, "learning_rate": 3.416666666666667e-06, "loss": 0.0292, "num_tokens": 16985195.0, "reward": 1.07421875, "reward_std": 0.32221925258636475, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9765625, "step": 77 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.546784715750233, "calib/avg_num_step_conf": 7.63671875, "calib/ece": 0.3200390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.41796875, "calib/gap": 0.017716682199440892, "calib/mean_conf": 0.8830078124999999, "calib/mu_c": 0.8906896551724138, "calib/mu_w": 0.8729729729729729, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31832031250000004, "calib/std_conf": 0.08451571563303976, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7521550946798918, "calib/step_q_c_n": 1109.0, "calib/step_q_gap": 0.02726502375790607, "calib/step_q_w": 0.7248900709219858, "calib/step_q_w_n": 846.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 451.578125, "completions/mean_terminated_length": 453.34906005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.0832, "grad_norm": 0.007119826041162014, "learning_rate": 3.3888888888888893e-06, "loss": 0.0059, "num_tokens": 17208823.0, "reward": 1.06640625, "reward_std": 0.2874803841114044, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 1.0, "step": 78 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5097412967103161, "calib/avg_num_step_conf": 8.07421875, "calib/ece": 0.2846875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4296875, "calib/gap": 0.0012328329607155242, "calib/mean_conf": 0.877578125, "calib/mu_c": 0.8780645161290322, "calib/mu_w": 0.8768316831683167, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27839843749999993, "calib/std_conf": 0.08033860075632619, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7329100887812753, "calib/step_q_c_n": 1239.0, "calib/step_q_gap": 0.02181105496485025, "calib/step_q_w": 0.711099033816425, "calib/step_q_w_n": 828.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 466.0078125, "completions/mean_terminated_length": 467.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.08426666666666667, "grad_norm": 0.007006760220974684, "learning_rate": 3.3611111111111117e-06, "loss": 0.0242, "num_tokens": 17434497.0, "reward": 1.10546875, "reward_std": 0.30984586477279663, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 1.0, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49135483870967744, "calib/avg_num_step_conf": 7.62890625, "calib/ece": 0.28654901960784307, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.45098039215686275, "calib/gap": 0.0019290322580646224, "calib/mean_conf": 0.8853725490196079, "calib/mu_c": 0.8861290322580645, "calib/mu_w": 0.8841999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28203921568627444, "calib/std_conf": 0.07883314739877266, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7555366284201236, "calib/step_q_c_n": 1133.0, "calib/step_q_gap": 0.03328784793231887, "calib/step_q_w": 0.7222487804878047, "calib/step_q_w_n": 820.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 422.24609375, "completions/mean_terminated_length": 423.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.08533333333333333, "grad_norm": 0.006706281565129757, "learning_rate": 3.3333333333333333e-06, "loss": 0.0001, "num_tokens": 17644752.0, "reward": 1.107421875, "reward_std": 0.2589503526687622, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5105316584189823, "calib/avg_num_step_conf": 7.81640625, "calib/ece": 0.33027667984189735, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.45454545454545453, "calib/gap": 0.007438142367719669, "calib/mean_conf": 0.8868774703557312, "calib/mu_c": 0.8901408450704226, "calib/mu_w": 0.8827027027027029, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3279446640316207, "calib/std_conf": 0.07486042880397843, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7595820271682341, "calib/step_q_c_n": 957.0, "calib/step_q_gap": 0.04866248693834896, "calib/step_q_w": 0.7109195402298851, "calib/step_q_w_n": 1044.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 473.12109375, "completions/mean_terminated_length": 473.12109375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.0864, "grad_norm": 0.006804213859140873, "learning_rate": 3.3055555555555558e-06, "loss": 0.0445, "num_tokens": 17872119.0, "reward": 1.048828125, "reward_std": 0.2571106553077698, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.98828125, "step": 81 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.53250062924742, "calib/avg_num_step_conf": 6.44921875, "calib/ece": 0.3499604743083004, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.35968379446640314, "calib/gap": 0.0020922476717843974, "calib/mean_conf": 0.8741501976284586, "calib/mu_c": 0.8751094890510948, "calib/mu_w": 0.8730172413793104, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.341304347826087, "calib/std_conf": 0.06940578658457372, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7785149752475247, "calib/step_q_c_n": 808.0, "calib/step_q_gap": 0.02590524808263739, "calib/step_q_w": 0.7526097271648873, "calib/step_q_w_n": 843.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 410.4453125, "completions/mean_terminated_length": 410.4453125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.08746666666666666, "grad_norm": 0.007754732389003038, "learning_rate": 3.277777777777778e-06, "loss": 0.0765, "num_tokens": 18082745.0, "reward": 1.02734375, "reward_std": 0.2921496033668518, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.984375, "step": 82 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4460887296094908, "calib/avg_num_step_conf": 7.21875, "calib/ece": 0.3640392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.4745098039215686, "calib/gap": -0.010126050420167787, "calib/mean_conf": 0.8973725490196078, "calib/mu_c": 0.8926470588235295, "calib/mu_w": 0.9027731092436972, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3640392156862745, "calib/std_conf": 0.06317543602565724, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7684606613454961, "calib/step_q_c_n": 877.0, "calib/step_q_gap": 0.002765501716247898, "calib/step_q_w": 0.7656951596292482, "calib/step_q_w_n": 971.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2606.0, "completions/max_terminated_length": 2606.0, "completions/mean_length": 495.95703125, "completions/mean_terminated_length": 495.95703125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.08853333333333334, "grad_norm": 0.005239543970674276, "learning_rate": 3.2500000000000002e-06, "loss": 0.017, "num_tokens": 18316974.0, "reward": 1.029296875, "reward_std": 0.19005656242370605, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.99609375, "step": 83 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4947182004598273, "calib/avg_num_step_conf": 6.48828125, "calib/ece": 0.3700393700787401, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.48031496062992124, "calib/gap": 0.0010520101907661727, "calib/mean_conf": 0.8897244094488189, "calib/mu_c": 0.8902255639097744, "calib/mu_w": 0.8891735537190082, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36807086614173223, "calib/std_conf": 0.06928432543249231, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7825955794504182, "calib/step_q_c_n": 837.0, "calib/step_q_gap": 0.008748492071777303, "calib/step_q_w": 0.7738470873786409, "calib/step_q_w_n": 824.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1358.0, "completions/max_terminated_length": 1358.0, "completions/mean_length": 406.80078125, "completions/mean_terminated_length": 408.3960876464844, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.0896, "grad_norm": 0.00738998968154192, "learning_rate": 3.2222222222222227e-06, "loss": -0.0329, "num_tokens": 18527035.0, "reward": 1.015625, "reward_std": 0.2715874910354614, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.9921875, "step": 84 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5067310134620269, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.3924701195219124, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5059760956175299, "calib/gap": 0.0029857759715518073, "calib/mean_conf": 0.8984462151394423, "calib/mu_c": 0.8999212598425195, "calib/mu_w": 0.8969354838709677, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3924701195219124, "calib/std_conf": 0.06390025917885646, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.782581863979849, "calib/step_q_c_n": 794.0, "calib/step_q_gap": 0.020721145807922836, "calib/step_q_w": 0.7618607181719261, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 472.39453125, "completions/mean_terminated_length": 476.1141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.09066666666666667, "grad_norm": 0.0065482864156365395, "learning_rate": 3.1944444444444443e-06, "loss": 0.012, "num_tokens": 18755792.0, "reward": 0.986328125, "reward_std": 0.27897346019744873, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.98046875, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47883027240231724, "calib/avg_num_step_conf": 6.6171875, "calib/ece": 0.3670196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.45098039215686275, "calib/gap": 0.005936768149882643, "calib/mean_conf": 0.8885882352941177, "calib/mu_c": 0.8914285714285713, "calib/mu_w": 0.8854918032786887, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3670196078431373, "calib/std_conf": 0.08120880288191216, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8187168141592921, "calib/step_q_c_n": 904.0, "calib/step_q_gap": 0.03227377618460858, "calib/step_q_w": 0.7864430379746835, "calib/step_q_w_n": 790.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1225.0, "completions/max_terminated_length": 1225.0, "completions/mean_length": 435.76953125, "completions/mean_terminated_length": 437.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.09173333333333333, "grad_norm": 0.0072197201661765575, "learning_rate": 3.1666666666666667e-06, "loss": 0.0072, "num_tokens": 18972861.0, "reward": 1.017578125, "reward_std": 0.22306768596172333, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.99609375, "step": 86 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4983501683501683, "calib/avg_num_step_conf": 5.84375, "calib/ece": 0.2562352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.41568627450980394, "calib/gap": -0.0013737373737373604, "calib/mean_conf": 0.8946666666666667, "calib/mu_c": 0.8941818181818183, "calib/mu_w": 0.8955555555555557, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2519215686274511, "calib/std_conf": 0.05499898988971424, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7833549083063647, "calib/step_q_c_n": 927.0, "calib/step_q_gap": -1.8579502257809466e-06, "calib/step_q_w": 0.7833567662565905, "calib/step_q_w_n": 569.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1564.0, "completions/max_terminated_length": 1564.0, "completions/mean_length": 395.4375, "completions/mean_terminated_length": 395.4375, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.0928, "grad_norm": 0.00887050200253725, "learning_rate": 3.138888888888889e-06, "loss": 0.0289, "num_tokens": 19179589.0, "reward": 1.142578125, "reward_std": 0.29103517532348633, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.99609375, "step": 87 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45328185328185333, "calib/avg_num_step_conf": 6.07421875, "calib/ece": 0.33593625498007973, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.42231075697211157, "calib/gap": -0.0045019305019303735, "calib/mean_conf": 0.8937051792828685, "calib/mu_c": 0.8917142857142858, "calib/mu_w": 0.8962162162162162, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.33593625498007973, "calib/std_conf": 0.04975054037893203, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8097133027522935, "calib/step_q_c_n": 872.0, "calib/step_q_gap": 0.013877578008515856, "calib/step_q_w": 0.7958357247437776, "calib/step_q_w_n": 683.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 467.12890625, "completions/mean_terminated_length": 467.12890625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.09386666666666667, "grad_norm": 0.006494402419775724, "learning_rate": 3.1111111111111116e-06, "loss": 0.0539, "num_tokens": 19409022.0, "reward": 1.037109375, "reward_std": 0.25544431805610657, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.98046875, "step": 88 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4289703965741948, "calib/avg_num_step_conf": 5.62109375, "calib/ece": 0.3801574803149607, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4448818897637795, "calib/gap": -0.010303481660770775, "calib/mean_conf": 0.8959055118110236, "calib/mu_c": 0.8909160305343511, "calib/mu_w": 0.9012195121951219, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3801574803149607, "calib/std_conf": 0.0515179707306355, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8084137931034483, "calib/step_q_c_n": 725.0, "calib/step_q_gap": 0.033035641842944274, "calib/step_q_w": 0.775378151260504, "calib/step_q_w_n": 714.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1540.0, "completions/max_terminated_length": 1540.0, "completions/mean_length": 442.67578125, "completions/mean_terminated_length": 444.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.09493333333333333, "grad_norm": 0.007002872880548239, "learning_rate": 3.0833333333333336e-06, "loss": 0.0215, "num_tokens": 19631235.0, "reward": 1.0078125, "reward_std": 0.24216635525226593, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.9921875, "step": 89 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.41836536594835955, "calib/avg_num_step_conf": 5.66796875, "calib/ece": 0.30055118110236223, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.484251968503937, "calib/gap": -0.016638840354623596, "calib/mean_conf": 0.8904724409448818, "calib/mu_c": 0.8838562091503269, "calib/mu_w": 0.9004950495049505, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29433070866141736, "calib/std_conf": 0.08478187174114259, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7948888888888888, "calib/step_q_c_n": 810.0, "calib/step_q_gap": -0.0031003466805341606, "calib/step_q_w": 0.7979892355694229, "calib/step_q_w_n": 641.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2627.0, "completions/max_terminated_length": 2627.0, "completions/mean_length": 428.78515625, "completions/mean_terminated_length": 428.78515625, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.096, "grad_norm": 0.006482626777142286, "learning_rate": 3.055555555555556e-06, "loss": 0.0323, "num_tokens": 19844324.0, "reward": 1.09375, "reward_std": 0.2187202423810959, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9921875, "step": 90 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45087365591397854, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.2769322709163346, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4262948207171315, "calib/gap": -0.006774865591397949, "calib/mean_conf": 0.8944621513944223, "calib/mu_c": 0.8918709677419355, "calib/mu_w": 0.8986458333333335, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2769322709163346, "calib/std_conf": 0.0498284863261445, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.813194130925508, "calib/step_q_c_n": 886.0, "calib/step_q_gap": -0.011521875959173533, "calib/step_q_w": 0.8247160068846815, "calib/step_q_w_n": 581.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 440.40625, "completions/mean_terminated_length": 440.40625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.09706666666666666, "grad_norm": 0.007019788958132267, "learning_rate": 3.0277777777777776e-06, "loss": 0.0395, "num_tokens": 20064780.0, "reward": 1.095703125, "reward_std": 0.1930152028799057, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.98046875, "step": 91 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4741679566563467, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.2944881889763781, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.35826771653543305, "calib/gap": -0.00839654282765745, "calib/mean_conf": 0.8837007874015748, "calib/mu_c": 0.880328947368421, "calib/mu_w": 0.8887254901960785, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28988188976377965, "calib/std_conf": 0.05577377180563677, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8058981233243968, "calib/step_q_c_n": 746.0, "calib/step_q_gap": -0.020068612642339168, "calib/step_q_w": 0.825966735966736, "calib/step_q_w_n": 481.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2160.0, "completions/max_terminated_length": 2160.0, "completions/mean_length": 392.453125, "completions/mean_terminated_length": 392.453125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.09813333333333334, "grad_norm": 0.008154223673045635, "learning_rate": 3e-06, "loss": -0.0069, "num_tokens": 20271968.0, "reward": 1.08984375, "reward_std": 0.2793588936328888, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.9921875, "step": 92 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4661604799400075, "calib/avg_num_step_conf": 5.87109375, "calib/ece": 0.3822134387351777, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3201581027667984, "calib/gap": -0.0037995250593676477, "calib/mean_conf": 0.8806324110671937, "calib/mu_c": 0.8787401574803146, "calib/mu_w": 0.8825396825396823, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.38043478260869545, "calib/std_conf": 0.05129542484083523, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8162957540263542, "calib/step_q_c_n": 683.0, "calib/step_q_gap": 0.005393315001963939, "calib/step_q_w": 0.8109024390243903, "calib/step_q_w_n": 820.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 461.25390625, "completions/mean_terminated_length": 461.25390625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.0992, "grad_norm": 0.007273159455507994, "learning_rate": 2.9722222222222225e-06, "loss": 0.0276, "num_tokens": 20495825.0, "reward": 0.986328125, "reward_std": 0.2531079649925232, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.98046875, "step": 93 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46136277122192615, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.32434782608695667, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.31620553359683795, "calib/gap": 0.0037400076132469984, "calib/mean_conf": 0.8856126482213439, "calib/mu_c": 0.8872535211267606, "calib/mu_w": 0.8835135135135136, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.32434782608695667, "calib/std_conf": 0.05223956822413295, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8169861111111112, "calib/step_q_c_n": 720.0, "calib/step_q_gap": 0.0012822719886249612, "calib/step_q_w": 0.8157038391224862, "calib/step_q_w_n": 547.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2696.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 408.25, "completions/mean_terminated_length": 409.85101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.10026666666666667, "grad_norm": 0.007511666510254145, "learning_rate": 2.944444444444445e-06, "loss": 0.0165, "num_tokens": 20709017.0, "reward": 1.046875, "reward_std": 0.26490265130996704, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.984375, "step": 94 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4770461020461021, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.26949019607843144, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.28627450980392155, "calib/gap": -0.004545454545454741, "calib/mean_conf": 0.8784313725490196, "calib/mu_c": 0.8766666666666666, "calib/mu_w": 0.8812121212121213, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2680784313725491, "calib/std_conf": 0.05112738988209085, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8057089552238805, "calib/step_q_c_n": 804.0, "calib/step_q_gap": 0.015708955223880494, "calib/step_q_w": 0.79, "calib/step_q_w_n": 478.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 412.42578125, "completions/mean_terminated_length": 414.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.10133333333333333, "grad_norm": 0.006043555215001106, "learning_rate": 2.916666666666667e-06, "loss": -0.0196, "num_tokens": 20920726.0, "reward": 1.10546875, "reward_std": 0.20898544788360596, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.9921875, "step": 95 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.41727759763146766, "calib/avg_num_step_conf": 5.1171875, "calib/ece": 0.20921568627450987, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2980392156862745, "calib/gap": -0.012831665021852623, "calib/mean_conf": 0.8823921568627451, "calib/mu_c": 0.8782658959537573, "calib/mu_w": 0.8910975609756099, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2065882352941177, "calib/std_conf": 0.047396565785815324, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8122093023255814, "calib/step_q_c_n": 860.0, "calib/step_q_gap": 0.006564857881136921, "calib/step_q_w": 0.8056444444444445, "calib/step_q_w_n": 450.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1030.0, "completions/max_terminated_length": 1030.0, "completions/mean_length": 386.23828125, "completions/mean_terminated_length": 387.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1024, "grad_norm": 0.0067080045118927956, "learning_rate": 2.888888888888889e-06, "loss": 0.0224, "num_tokens": 21125419.0, "reward": 1.177734375, "reward_std": 0.19021794199943542, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.99609375, "step": 96 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5540832715591797, "calib/avg_num_step_conf": 5.14453125, "calib/ece": 0.3196874999999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.2265625, "calib/gap": 0.00978502594514441, "calib/mean_conf": 0.874375, "calib/mu_c": 0.878732394366197, "calib/mu_w": 0.8689473684210526, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3196874999999999, "calib/std_conf": 0.04965112662367291, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8144895104895103, "calib/step_q_c_n": 715.0, "calib/step_q_gap": 0.008011105173895694, "calib/step_q_w": 0.8064784053156147, "calib/step_q_w_n": 602.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 922.0, "completions/max_terminated_length": 922.0, "completions/mean_length": 406.0859375, "completions/mean_terminated_length": 407.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.10346666666666667, "grad_norm": 0.007084716111421585, "learning_rate": 2.861111111111111e-06, "loss": 0.016, "num_tokens": 21334449.0, "reward": 1.0546875, "reward_std": 0.2218756079673767, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 1.0, "step": 97 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4697322081201408, "calib/avg_num_step_conf": 4.5703125, "calib/ece": 0.2829600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.272, "calib/gap": -0.0010804704631537376, "calib/mean_conf": 0.8789600000000001, "calib/mu_c": 0.8785234899328858, "calib/mu_w": 0.8796039603960395, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2829600000000001, "calib/std_conf": 0.04746913102217061, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.808751950078003, "calib/step_q_c_n": 641.0, "calib/step_q_gap": -0.0006620385798418171, "calib/step_q_w": 0.8094139886578449, "calib/step_q_w_n": 529.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 439.12890625, "completions/mean_terminated_length": 440.85101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.10453333333333334, "grad_norm": 0.008098619990050793, "learning_rate": 2.8333333333333335e-06, "loss": 0.0601, "num_tokens": 21553050.0, "reward": 1.0703125, "reward_std": 0.2985646426677704, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.9765625, "step": 98 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44878862120593443, "calib/avg_num_step_conf": 5.13671875, "calib/ece": 0.5012749003984065, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2788844621513944, "calib/gap": -0.0076310058527289115, "calib/mean_conf": 0.871792828685259, "calib/mu_c": 0.866989247311828, "calib/mu_w": 0.8746202531645569, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.5012749003984065, "calib/std_conf": 0.05820328529122533, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8003992015968064, "calib/step_q_c_n": 501.0, "calib/step_q_gap": 0.00536234655995127, "calib/step_q_w": 0.7950368550368552, "calib/step_q_w_n": 814.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 507.890625, "completions/mean_terminated_length": 507.890625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.1056, "grad_norm": 0.008391114883124828, "learning_rate": 2.805555555555556e-06, "loss": 0.0662, "num_tokens": 21788870.0, "reward": 0.8515625, "reward_std": 0.3123432695865631, "rewards/accuracy_reward_step": 0.36328125, "rewards/format_reward_step": 0.9765625, "step": 99 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48122427983539096, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.2957142857142858, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2261904761904762, "calib/gap": 0.0043981481481480955, "calib/mean_conf": 0.8671428571428571, "calib/mu_c": 0.8690277777777777, "calib/mu_w": 0.8646296296296296, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2957142857142858, "calib/std_conf": 0.06583839212458807, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.806955345060893, "calib/step_q_c_n": 739.0, "calib/step_q_gap": 0.005649038754586666, "calib/step_q_w": 0.8013063063063063, "calib/step_q_w_n": 444.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 455.484375, "completions/mean_terminated_length": 459.07086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.10666666666666667, "grad_norm": 0.007559601683169603, "learning_rate": 2.7777777777777783e-06, "loss": 0.0124, "num_tokens": 22012882.0, "reward": 1.052734375, "reward_std": 0.26982903480529785, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.98046875, "step": 100 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5526965562053282, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.41590361445783147, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.24497991967871485, "calib/gap": 0.010423001949317778, "calib/mean_conf": 0.8737349397590362, "calib/mu_c": 0.8793859649122807, "calib/mu_w": 0.8689629629629629, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.41590361445783147, "calib/std_conf": 0.05195095408766677, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.8095379537953795, "calib/step_q_c_n": 606.0, "calib/step_q_gap": 0.01179500708691561, "calib/step_q_w": 0.7977429467084639, "calib/step_q_w_n": 638.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2611.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 503.3828125, "completions/mean_terminated_length": 503.3828125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.10773333333333333, "grad_norm": 0.0077116601169109344, "learning_rate": 2.7500000000000004e-06, "loss": 0.0604, "num_tokens": 22248740.0, "reward": 0.9296875, "reward_std": 0.32882392406463623, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.96875, "step": 101 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48478319418260163, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.2480555555555557, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.24206349206349206, "calib/gap": -0.005557500673309734, "calib/mean_conf": 0.8725793650793652, "calib/mu_c": 0.8705063291139242, "calib/mu_w": 0.876063829787234, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.24682539682539695, "calib/std_conf": 0.05092101776664059, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8058095238095238, "calib/step_q_c_n": 735.0, "calib/step_q_gap": 0.0016931927133267966, "calib/step_q_w": 0.804116331096197, "calib/step_q_w_n": 447.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2261.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 381.67578125, "completions/mean_terminated_length": 386.20159912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.1088, "grad_norm": 0.008189848624169827, "learning_rate": 2.7222222222222224e-06, "loss": -0.0209, "num_tokens": 22453145.0, "reward": 1.1015625, "reward_std": 0.21460118889808655, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.96875, "step": 102 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5132780349794239, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.30587301587301574, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.23015873015873015, "calib/gap": -0.001481481481481306, "calib/mean_conf": 0.8773015873015872, "calib/mu_c": 0.8766666666666666, "calib/mu_w": 0.8781481481481479, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.30587301587301574, "calib/std_conf": 0.04898027945263534, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8151098096632504, "calib/step_q_c_n": 683.0, "calib/step_q_gap": 0.013976608868021767, "calib/step_q_w": 0.8011332007952287, "calib/step_q_w_n": 503.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2695.0, "completions/max_terminated_length": 2695.0, "completions/mean_length": 451.765625, "completions/mean_terminated_length": 453.53729248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.10986666666666667, "grad_norm": 0.0077002933248877525, "learning_rate": 2.6944444444444444e-06, "loss": 0.013, "num_tokens": 22673349.0, "reward": 1.0546875, "reward_std": 0.19874930381774902, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.984375, "step": 103 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4478870509175498, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.4536507936507937, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.23015873015873015, "calib/gap": -0.0038123546135953035, "calib/mean_conf": 0.8707936507936508, "calib/mu_c": 0.8685849056603773, "calib/mu_w": 0.8723972602739726, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4519047619047619, "calib/std_conf": 0.07719901280933066, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8096183206106871, "calib/step_q_c_n": 524.0, "calib/step_q_gap": 0.020759624958513245, "calib/step_q_w": 0.7888586956521738, "calib/step_q_w_n": 736.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2125.0, "completions/max_terminated_length": 2125.0, "completions/mean_length": 457.78125, "completions/mean_terminated_length": 459.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.11093333333333333, "grad_norm": 0.008258271962404251, "learning_rate": 2.666666666666667e-06, "loss": 0.0147, "num_tokens": 22897221.0, "reward": 0.91015625, "reward_std": 0.312614381313324, "rewards/accuracy_reward_step": 0.41796875, "rewards/format_reward_step": 0.984375, "step": 104 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5020491803278688, "calib/avg_num_step_conf": 4.62890625, "calib/ece": 0.3603200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.268, "calib/gap": 0.0016495901639342891, "calib/mean_conf": 0.87232, "calib/mu_c": 0.8731249999999999, "calib/mu_w": 0.8714754098360656, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.3603200000000001, "calib/std_conf": 0.06705980614347166, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7889141856392294, "calib/step_q_c_n": 571.0, "calib/step_q_gap": 0.011187801274408549, "calib/step_q_w": 0.7777263843648209, "calib/step_q_w_n": 614.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 473.08984375, "completions/mean_terminated_length": 476.8149719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.112, "grad_norm": 0.0080982381477952, "learning_rate": 2.6388888888888893e-06, "loss": -0.0084, "num_tokens": 23124092.0, "reward": 0.98046875, "reward_std": 0.3512883186340332, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.9609375, "step": 105 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4590505998956703, "calib/avg_num_step_conf": 4.87890625, "calib/ece": 0.31031999999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.304, "calib/gap": -0.005239958268127176, "calib/mean_conf": 0.87832, "calib/mu_c": 0.8760563380281691, "calib/mu_w": 0.8812962962962962, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.31031999999999993, "calib/std_conf": 0.05275962092358132, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7902427921092564, "calib/step_q_c_n": 659.0, "calib/step_q_gap": 0.010836012448239396, "calib/step_q_w": 0.779406779661017, "calib/step_q_w_n": 590.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2299.0, "completions/max_terminated_length": 2299.0, "completions/mean_length": 443.265625, "completions/mean_terminated_length": 443.265625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.11306666666666666, "grad_norm": 0.006772839929908514, "learning_rate": 2.6111111111111113e-06, "loss": 0.1007, "num_tokens": 23342152.0, "reward": 1.041015625, "reward_std": 0.2168501913547516, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.97265625, "step": 106 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5359669811320754, "calib/avg_num_step_conf": 5.42578125, "calib/ece": 0.2570196078431372, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2784313725490196, "calib/gap": 0.007295597484276772, "calib/mean_conf": 0.8778823529411766, "calib/mu_c": 0.8806289308176102, "calib/mu_w": 0.8733333333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.25568627450980386, "calib/std_conf": 0.05434300902377219, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7773294117647059, "calib/step_q_c_n": 850.0, "calib/step_q_gap": -0.009201200480192018, "calib/step_q_w": 0.786530612244898, "calib/step_q_w_n": 539.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1286.0, "completions/max_terminated_length": 1286.0, "completions/mean_length": 432.13671875, "completions/mean_terminated_length": 433.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.11413333333333334, "grad_norm": 0.006940098479390144, "learning_rate": 2.5833333333333337e-06, "loss": 0.0097, "num_tokens": 23557395.0, "reward": 1.1171875, "reward_std": 0.2205488383769989, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9921875, "step": 107 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5335861321776815, "calib/avg_num_step_conf": 5.7109375, "calib/ece": 0.1599604743083005, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.30039525691699603, "calib/gap": 0.00709332920600525, "calib/mean_conf": 0.8793280632411067, "calib/mu_c": 0.8813186813186812, "calib/mu_w": 0.8742253521126759, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1599604743083005, "calib/std_conf": 0.04983711693419079, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7930469530469529, "calib/step_q_c_n": 1001.0, "calib/step_q_gap": 0.009077321810510464, "calib/step_q_w": 0.7839696312364425, "calib/step_q_w_n": 461.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2736.0, "completions/max_terminated_length": 2736.0, "completions/mean_length": 477.35546875, "completions/mean_terminated_length": 477.35546875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.1152, "grad_norm": 0.006124243140220642, "learning_rate": 2.5555555555555557e-06, "loss": 0.0207, "num_tokens": 23782830.0, "reward": 1.205078125, "reward_std": 0.20648230612277985, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.98828125, "step": 108 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.501492252984506, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.3770916334661354, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3466135458167331, "calib/gap": -0.0004718059436117361, "calib/mean_conf": 0.8830677290836654, "calib/mu_c": 0.8828346456692916, "calib/mu_w": 0.8833064516129033, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3770916334661354, "calib/std_conf": 0.0537271344343267, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7836856010568032, "calib/step_q_c_n": 757.0, "calib/step_q_gap": 0.026465768320841243, "calib/step_q_w": 0.7572198327359619, "calib/step_q_w_n": 837.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 494.6328125, "completions/mean_terminated_length": 496.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.11626666666666667, "grad_norm": 0.0049357945099473, "learning_rate": 2.5277777777777778e-06, "loss": 0.056, "num_tokens": 24014056.0, "reward": 0.986328125, "reward_std": 0.14231275022029877, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.98046875, "step": 109 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4638297872340426, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.3464453125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.3203125, "calib/gap": -0.013190872648782048, "calib/mean_conf": 0.8800390625000001, "calib/mu_c": 0.874113475177305, "calib/mu_w": 0.887304347826087, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3378515625, "calib/std_conf": 0.06806188617075709, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7839516129032257, "calib/step_q_c_n": 620.0, "calib/step_q_gap": 0.0028177839069432054, "calib/step_q_w": 0.7811338289962825, "calib/step_q_w_n": 538.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1209.0, "completions/max_terminated_length": 1209.0, "completions/mean_length": 416.0546875, "completions/mean_terminated_length": 417.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.11733333333333333, "grad_norm": 0.007653168402612209, "learning_rate": 2.5e-06, "loss": 0.0085, "num_tokens": 24225486.0, "reward": 1.05078125, "reward_std": 0.25460559129714966, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 1.0, "step": 110 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5320876042528808, "calib/avg_num_step_conf": 5.453125, "calib/ece": 0.3301587301587301, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.32936507936507936, "calib/gap": 0.007657732221302727, "calib/mean_conf": 0.8817460317460318, "calib/mu_c": 0.885179856115108, "calib/mu_w": 0.8775221238938052, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3301587301587301, "calib/std_conf": 0.0544536990607233, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7849710982658958, "calib/step_q_c_n": 692.0, "calib/step_q_gap": 0.016732461902259632, "calib/step_q_w": 0.7682386363636362, "calib/step_q_w_n": 704.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2085.0, "completions/max_terminated_length": 2085.0, "completions/mean_length": 484.6015625, "completions/mean_terminated_length": 486.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1184, "grad_norm": 0.006669084075838327, "learning_rate": 2.4722222222222226e-06, "loss": 0.0063, "num_tokens": 24456952.0, "reward": 1.029296875, "reward_std": 0.2437652051448822, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5491143317230274, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.35539999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.344, "calib/gap": 0.012634460547503945, "calib/mean_conf": 0.8840399999999999, "calib/mu_c": 0.8898518518518518, "calib/mu_w": 0.8772173913043478, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.34972, "calib/std_conf": 0.07832801797568989, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7576998597475456, "calib/step_q_c_n": 713.0, "calib/step_q_gap": 0.012270306482940696, "calib/step_q_w": 0.7454295532646049, "calib/step_q_w_n": 582.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 502.12890625, "completions/mean_terminated_length": 510.0992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.11946666666666667, "grad_norm": 0.0062360563315451145, "learning_rate": 2.4444444444444447e-06, "loss": 0.0064, "num_tokens": 24693417.0, "reward": 1.013671875, "reward_std": 0.22822797298431396, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.97265625, "step": 112 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5737267080745341, "calib/avg_num_step_conf": 5.56640625, "calib/ece": 0.33831372549019617, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.30980392156862746, "calib/gap": 0.008329192546583708, "calib/mean_conf": 0.881529411764706, "calib/mu_c": 0.8852857142857142, "calib/mu_w": 0.8769565217391305, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3354117647058824, "calib/std_conf": 0.054639766076630326, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.779700272479564, "calib/step_q_c_n": 734.0, "calib/step_q_gap": 0.03318797146653929, "calib/step_q_w": 0.7465123010130247, "calib/step_q_w_n": 691.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1569.0, "completions/max_terminated_length": 1569.0, "completions/mean_length": 428.05078125, "completions/mean_terminated_length": 429.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.12053333333333334, "grad_norm": 0.007974677719175816, "learning_rate": 2.4166666666666667e-06, "loss": 0.0155, "num_tokens": 24908198.0, "reward": 1.044921875, "reward_std": 0.26487648487091064, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.99609375, "step": 113 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44065556436690456, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.2709881422924902, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.37549407114624506, "calib/gap": -0.008441712926249068, "calib/mean_conf": 0.8859288537549408, "calib/mu_c": 0.8826923076923077, "calib/mu_w": 0.8911340206185567, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2701581027667985, "calib/std_conf": 0.053468867945726924, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7469487750556792, "calib/step_q_c_n": 898.0, "calib/step_q_gap": 0.00016963395138480664, "calib/step_q_w": 0.7467791411042944, "calib/step_q_w_n": 652.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 483.28125, "completions/mean_terminated_length": 483.28125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.1216, "grad_norm": 0.007670039776712656, "learning_rate": 2.388888888888889e-06, "loss": 0.0323, "num_tokens": 25136942.0, "reward": 1.103515625, "reward_std": 0.3302323520183563, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.98828125, "step": 114 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5204283887468031, "calib/avg_num_step_conf": 5.76171875, "calib/ece": 0.3450199203187251, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3944223107569721, "calib/gap": 0.0028574168797955712, "calib/mean_conf": 0.8868525896414343, "calib/mu_c": 0.8881617647058824, "calib/mu_w": 0.8853043478260868, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3450199203187251, "calib/std_conf": 0.05376700308538497, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7592137592137592, "calib/step_q_c_n": 814.0, "calib/step_q_gap": -0.013388358789266475, "calib/step_q_w": 0.7726021180030257, "calib/step_q_w_n": 661.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 452.8203125, "completions/mean_terminated_length": 454.5960998535156, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.12266666666666666, "grad_norm": 0.006985951215028763, "learning_rate": 2.361111111111111e-06, "loss": 0.0417, "num_tokens": 25358128.0, "reward": 1.017578125, "reward_std": 0.2832874059677124, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.97265625, "step": 115 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.3871982537236774, "calib/avg_num_step_conf": 6.40234375, "calib/ece": 0.3520800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.34, "calib/gap": -0.019911402157165026, "calib/mean_conf": 0.88008, "calib/mu_c": 0.8706818181818182, "calib/mu_w": 0.8905932203389832, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3520800000000001, "calib/std_conf": 0.052786301253260774, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7537380627557981, "calib/step_q_c_n": 733.0, "calib/step_q_gap": 0.022038283506349954, "calib/step_q_w": 0.7316997792494482, "calib/step_q_w_n": 906.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3024.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 497.71484375, "completions/mean_terminated_length": 499.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.12373333333333333, "grad_norm": 0.005950558930635452, "learning_rate": 2.3333333333333336e-06, "loss": 0.0413, "num_tokens": 25590063.0, "reward": 1.0, "reward_std": 0.24267561733722687, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.96875, "step": 116 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5050012503125781, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.39525691699604737, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3715415019762846, "calib/gap": 0.005590147536883916, "calib/mean_conf": 0.8853754940711462, "calib/mu_c": 0.8882258064516128, "calib/mu_w": 0.8826356589147288, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39525691699604737, "calib/std_conf": 0.057183199392304755, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.765679758308157, "calib/step_q_c_n": 662.0, "calib/step_q_gap": 0.05223902182944584, "calib/step_q_w": 0.7134407364787112, "calib/step_q_w_n": 869.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 485.38671875, "completions/mean_terminated_length": 485.38671875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.1248, "grad_norm": 0.005563353653997183, "learning_rate": 2.305555555555556e-06, "loss": 0.0457, "num_tokens": 25820922.0, "reward": 0.974609375, "reward_std": 0.21814069151878357, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.98046875, "step": 117 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4824248120300752, "calib/avg_num_step_conf": 6.9453125, "calib/ece": 0.32771653543307083, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.37401574803149606, "calib/gap": 0.0061165413533834645, "calib/mean_conf": 0.8788976377952756, "calib/mu_c": 0.8816428571428572, "calib/mu_w": 0.8755263157894737, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32771653543307083, "calib/std_conf": 0.08366344601523654, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7382222222222223, "calib/step_q_c_n": 855.0, "calib/step_q_gap": 0.03433273143132298, "calib/step_q_w": 0.7038894907908994, "calib/step_q_w_n": 923.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2218.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 532.171875, "completions/mean_terminated_length": 534.2588500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.12586666666666665, "grad_norm": 0.004880652762949467, "learning_rate": 2.277777777777778e-06, "loss": 0.0038, "num_tokens": 26061166.0, "reward": 1.04296875, "reward_std": 0.18309026956558228, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.9921875, "step": 118 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5308506518652382, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.37538152610441766, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.42168674698795183, "calib/gap": 0.007094359106751047, "calib/mean_conf": 0.885421686746988, "calib/mu_c": 0.8888976377952758, "calib/mu_w": 0.8818032786885247, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37538152610441766, "calib/std_conf": 0.057294445678343194, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7466408268733848, "calib/step_q_c_n": 774.0, "calib/step_q_gap": 0.035933279703573406, "calib/step_q_w": 0.7107075471698114, "calib/step_q_w_n": 848.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 557.765625, "completions/mean_terminated_length": 562.157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.12693333333333334, "grad_norm": 0.005895211827009916, "learning_rate": 2.25e-06, "loss": 0.0334, "num_tokens": 26309018.0, "reward": 0.982421875, "reward_std": 0.28925490379333496, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.97265625, "step": 119 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5175073373831138, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.2411507936507938, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3253968253968254, "calib/gap": 0.0009698996655516101, "calib/mean_conf": 0.8728174603174603, "calib/mu_c": 0.8731677018633539, "calib/mu_w": 0.8721978021978023, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2375396825396827, "calib/std_conf": 0.08035387020574726, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7378947368421053, "calib/step_q_c_n": 912.0, "calib/step_q_gap": -0.014609275998986204, "calib/step_q_w": 0.7525040128410915, "calib/step_q_w_n": 623.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2748.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 460.84765625, "completions/mean_terminated_length": 460.84765625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.128, "grad_norm": 0.006268102675676346, "learning_rate": 2.222222222222222e-06, "loss": 0.0838, "num_tokens": 26533683.0, "reward": 1.12109375, "reward_std": 0.24319830536842346, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.984375, "step": 120 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5452300575143786, "calib/avg_num_step_conf": 6.93359375, "calib/ece": 0.3722924901185771, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.35177865612648224, "calib/gap": 0.01538322080520138, "calib/mean_conf": 0.8769565217391304, "calib/mu_c": 0.8844961240310077, "calib/mu_w": 0.8691129032258064, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3696837944664032, "calib/std_conf": 0.07015951677025219, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7287597911227154, "calib/step_q_c_n": 766.0, "calib/step_q_gap": 0.00698575742598595, "calib/step_q_w": 0.7217740336967294, "calib/step_q_w_n": 1009.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2467.0, "completions/max_terminated_length": 2467.0, "completions/mean_length": 513.14453125, "completions/mean_terminated_length": 515.1569213867188, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.12906666666666666, "grad_norm": 0.005872685927897692, "learning_rate": 2.1944444444444445e-06, "loss": 0.042, "num_tokens": 26770104.0, "reward": 0.998046875, "reward_std": 0.3006698489189148, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.98828125, "step": 121 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5214, "calib/avg_num_step_conf": 6.5546875, "calib/ece": 0.2801600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.316, "calib/gap": 0.002466666666666728, "calib/mean_conf": 0.87168, "calib/mu_c": 0.8726666666666667, "calib/mu_w": 0.8702, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27592000000000005, "calib/std_conf": 0.0606133450652577, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7503354978354978, "calib/step_q_c_n": 924.0, "calib/step_q_gap": 0.04221878696016634, "calib/step_q_w": 0.7081167108753315, "calib/step_q_w_n": 754.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 462.7890625, "completions/mean_terminated_length": 470.13494873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.13013333333333332, "grad_norm": 0.0056983535178005695, "learning_rate": 2.166666666666667e-06, "loss": 0.0162, "num_tokens": 26995922.0, "reward": 1.07421875, "reward_std": 0.2015603482723236, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4684514831573655, "calib/avg_num_step_conf": 6.671875, "calib/ece": 0.33980237154150195, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.31620553359683795, "calib/gap": -0.008157365510306547, "calib/mean_conf": 0.8734782608695653, "calib/mu_c": 0.8697058823529412, "calib/mu_w": 0.8778632478632478, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.33786561264822135, "calib/std_conf": 0.06231816649450058, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7276886792452829, "calib/step_q_c_n": 848.0, "calib/step_q_gap": 0.031502632733654945, "calib/step_q_w": 0.696186046511628, "calib/step_q_w_n": 860.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 544.2890625, "completions/mean_terminated_length": 546.423583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.1312, "grad_norm": 0.006151233799755573, "learning_rate": 2.138888888888889e-06, "loss": -0.0215, "num_tokens": 27240548.0, "reward": 1.0234375, "reward_std": 0.29736945033073425, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.984375, "step": 123 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5681663258350375, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.23648221343873532, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2964426877470356, "calib/gap": 0.011232447171097615, "calib/mean_conf": 0.8743478260869565, "calib/mu_c": 0.8783435582822087, "calib/mu_w": 0.867111111111111, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23328063241106733, "calib/std_conf": 0.05411221656373088, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7459311740890688, "calib/step_q_c_n": 988.0, "calib/step_q_gap": -0.0011521592442643724, "calib/step_q_w": 0.7470833333333332, "calib/step_q_w_n": 624.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2217.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 493.08984375, "completions/mean_terminated_length": 493.08984375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.13226666666666667, "grad_norm": 0.006515982560813427, "learning_rate": 2.1111111111111114e-06, "loss": 0.0049, "num_tokens": 27473595.0, "reward": 1.12890625, "reward_std": 0.26251235604286194, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.984375, "step": 124 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4942148760330579, "calib/avg_num_step_conf": 6.15625, "calib/ece": 0.3582470119521912, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.26693227091633465, "calib/gap": -0.00013477431659247863, "calib/mean_conf": 0.8729880478087649, "calib/mu_c": 0.8729230769230768, "calib/mu_w": 0.8730578512396693, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3566533864541832, "calib/std_conf": 0.05457766001472556, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7396488764044944, "calib/step_q_c_n": 712.0, "calib/step_q_gap": 0.020678968997086877, "calib/step_q_w": 0.7189699074074075, "calib/step_q_w_n": 864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2858.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 502.80078125, "completions/mean_terminated_length": 504.7725830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.13333333333333333, "grad_norm": 0.00571054220199585, "learning_rate": 2.0833333333333334e-06, "loss": 0.0216, "num_tokens": 27707120.0, "reward": 0.99609375, "reward_std": 0.3074162006378174, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.9765625, "step": 125 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4624029551715502, "calib/avg_num_step_conf": 6.62109375, "calib/ece": 0.35237154150197625, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.33992094861660077, "calib/gap": -0.004318181818181777, "calib/mean_conf": 0.8741106719367588, "calib/mu_c": 0.8720454545454546, "calib/mu_w": 0.8763636363636363, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35237154150197625, "calib/std_conf": 0.06917711990051564, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7486817102137767, "calib/step_q_c_n": 842.0, "calib/step_q_gap": 0.0361529880566841, "calib/step_q_w": 0.7125287221570926, "calib/step_q_w_n": 853.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1862.0, "completions/max_terminated_length": 1862.0, "completions/mean_length": 481.08203125, "completions/mean_terminated_length": 484.8700866699219, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.1344, "grad_norm": 0.0058242930099368095, "learning_rate": 2.0555555555555555e-06, "loss": -0.0112, "num_tokens": 27935741.0, "reward": 1.0078125, "reward_std": 0.25029534101486206, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.984375, "step": 126 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48636220472440944, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.3589682539682541, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.25396825396825395, "calib/gap": 0.005033070866141798, "calib/mean_conf": 0.862936507936508, "calib/mu_c": 0.8654330708661417, "calib/mu_w": 0.8603999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3589682539682541, "calib/std_conf": 0.08610225980403213, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7256891891891892, "calib/step_q_c_n": 740.0, "calib/step_q_gap": 0.007122755622755661, "calib/step_q_w": 0.7185664335664336, "calib/step_q_w_n": 858.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2770.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 481.63671875, "completions/mean_terminated_length": 481.63671875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.13546666666666668, "grad_norm": 0.006309483200311661, "learning_rate": 2.027777777777778e-06, "loss": 0.0508, "num_tokens": 28162712.0, "reward": 0.986328125, "reward_std": 0.2884799540042877, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.98046875, "step": 127 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5073977936404932, "calib/avg_num_step_conf": 5.19140625, "calib/ece": 0.32823293172690754, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.28112449799196787, "calib/gap": 0.008704088254380404, "calib/mean_conf": 0.8627710843373495, "calib/mu_c": 0.8667910447761195, "calib/mu_w": 0.858086956521739, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3264257028112449, "calib/std_conf": 0.08950080100620286, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7451987281399046, "calib/step_q_c_n": 629.0, "calib/step_q_gap": 0.04137015671133315, "calib/step_q_w": 0.7038285714285715, "calib/step_q_w_n": 700.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 489.09765625, "completions/mean_terminated_length": 489.09765625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.13653333333333334, "grad_norm": 0.0059307715855538845, "learning_rate": 2.0000000000000003e-06, "loss": 0.0333, "num_tokens": 28394585.0, "reward": 1.009765625, "reward_std": 0.26397642493247986, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.97265625, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4389801375095493, "calib/avg_num_step_conf": 5.80859375, "calib/ece": 0.27578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.21875, "calib/gap": -0.013776419658772743, "calib/mean_conf": 0.86328125, "calib/mu_c": 0.8577922077922076, "calib/mu_w": 0.8715686274509803, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26875000000000004, "calib/std_conf": 0.05657005301780705, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7613248847926267, "calib/step_q_c_n": 868.0, "calib/step_q_gap": 0.009208568152885288, "calib/step_q_w": 0.7521163166397414, "calib/step_q_w_n": 619.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1591.0, "completions/max_terminated_length": 1591.0, "completions/mean_length": 422.0625, "completions/mean_terminated_length": 423.7176818847656, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.1376, "grad_norm": 0.00600614957511425, "learning_rate": 1.9722222222222224e-06, "loss": 0.0002, "num_tokens": 28605017.0, "reward": 1.1015625, "reward_std": 0.22764956951141357, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 1.0, "step": 129 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.544435099523409, "calib/avg_num_step_conf": 4.87890625, "calib/ece": 0.221394422310757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2549800796812749, "calib/gap": 0.006085646201289596, "calib/mean_conf": 0.8747808764940239, "calib/mu_c": 0.8768902439024391, "calib/mu_w": 0.8708045977011495, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.221394422310757, "calib/std_conf": 0.04967074652737102, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7580371352785145, "calib/step_q_c_n": 754.0, "calib/step_q_gap": 0.012420973662352841, "calib/step_q_w": 0.7456161616161616, "calib/step_q_w_n": 495.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 445.734375, "completions/mean_terminated_length": 445.734375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.13866666666666666, "grad_norm": 0.00482236547395587, "learning_rate": 1.944444444444445e-06, "loss": 0.0876, "num_tokens": 28824413.0, "reward": 1.130859375, "reward_std": 0.17365878820419312, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.98046875, "step": 130 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4551726350917726, "calib/avg_num_step_conf": 5.05078125, "calib/ece": 0.4493280632411065, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": -0.008606725709151664, "calib/mean_conf": 0.8658498023715415, "calib/mu_c": 0.8608490566037735, "calib/mu_w": 0.8694557823129252, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4481027667984188, "calib/std_conf": 0.05317367667620221, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7563003663003663, "calib/step_q_c_n": 546.0, "calib/step_q_gap": 0.012913485443606043, "calib/step_q_w": 0.7433868808567603, "calib/step_q_w_n": 747.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2368.0, "completions/max_terminated_length": 2368.0, "completions/mean_length": 425.23828125, "completions/mean_terminated_length": 425.23828125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.13973333333333332, "grad_norm": 0.00542412931099534, "learning_rate": 1.916666666666667e-06, "loss": 0.0357, "num_tokens": 29039482.0, "reward": 0.908203125, "reward_std": 0.19030889868736267, "rewards/accuracy_reward_step": 0.4140625, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5115197022205109, "calib/avg_num_step_conf": 5.26953125, "calib/ece": 0.29276679841897235, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.30039525691699603, "calib/gap": 0.002339237581825304, "calib/mean_conf": 0.8712648221343874, "calib/mu_c": 0.8722448979591837, "calib/mu_w": 0.8699056603773584, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.291501976284585, "calib/std_conf": 0.053847804000900085, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7725869894099849, "calib/step_q_c_n": 661.0, "calib/step_q_gap": 0.04151140801463604, "calib/step_q_w": 0.7310755813953489, "calib/step_q_w_n": 688.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 468.3046875, "completions/mean_terminated_length": 468.3046875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1408, "grad_norm": 0.005643435753881931, "learning_rate": 1.888888888888889e-06, "loss": 0.055, "num_tokens": 29264960.0, "reward": 1.068359375, "reward_std": 0.2384309619665146, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5193548387096774, "calib/avg_num_step_conf": 6.1796875, "calib/ece": 0.48519685039370086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.28346456692913385, "calib/gap": 0.004450961225154648, "calib/mean_conf": 0.8749606299212599, "calib/mu_c": 0.8776767676767675, "calib/mu_w": 0.8732258064516129, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48519685039370086, "calib/std_conf": 0.056476237296147466, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7284842883548983, "calib/step_q_c_n": 541.0, "calib/step_q_gap": -0.017836556986119967, "calib/step_q_w": 0.7463208453410183, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 541.546875, "completions/mean_terminated_length": 541.546875, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.14186666666666667, "grad_norm": 0.0053438725881278515, "learning_rate": 1.8611111111111113e-06, "loss": 0.0547, "num_tokens": 29509940.0, "reward": 0.8828125, "reward_std": 0.28567051887512207, "rewards/accuracy_reward_step": 0.38671875, "rewards/format_reward_step": 0.9921875, "step": 133 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49387676999617297, "calib/avg_num_step_conf": 5.70703125, "calib/ece": 0.3452988047808765, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.30677290836653387, "calib/gap": -0.0009267763745375301, "calib/mean_conf": 0.8791633466135459, "calib/mu_c": 0.878731343283582, "calib/mu_w": 0.8796581196581196, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3452988047808765, "calib/std_conf": 0.05131436984845087, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7596148359486448, "calib/step_q_c_n": 701.0, "calib/step_q_gap": 0.05404904647496056, "calib/step_q_w": 0.7055657894736842, "calib/step_q_w_n": 760.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 543.71484375, "completions/mean_terminated_length": 545.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.14293333333333333, "grad_norm": 0.005967985838651657, "learning_rate": 1.8333333333333333e-06, "loss": 0.0729, "num_tokens": 29758083.0, "reward": 1.01171875, "reward_std": 0.32466161251068115, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.9765625, "step": 134 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4150937339496661, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.34711999999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.312, "calib/gap": -0.016304571135079438, "calib/mean_conf": 0.87512, "calib/mu_c": 0.8674242424242424, "calib/mu_w": 0.8837288135593219, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.34711999999999993, "calib/std_conf": 0.052262659710351524, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7503671328671329, "calib/step_q_c_n": 572.0, "calib/step_q_gap": 0.00573984735377131, "calib/step_q_w": 0.7446272855133615, "calib/step_q_w_n": 711.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 504.8828125, "completions/mean_terminated_length": 506.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.144, "grad_norm": 0.0058227465488016605, "learning_rate": 1.8055555555555557e-06, "loss": 0.0757, "num_tokens": 29993213.0, "reward": 1.00390625, "reward_std": 0.2614578604698181, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.9765625, "step": 135 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.507595648912228, "calib/avg_num_step_conf": 5.7109375, "calib/ece": 0.38924901185770755, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3201581027667984, "calib/gap": 0.007725056264066166, "calib/mean_conf": 0.8793675889328064, "calib/mu_c": 0.8833064516129033, "calib/mu_w": 0.8755813953488372, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.38924901185770755, "calib/std_conf": 0.055442838641589665, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7597941176470587, "calib/step_q_c_n": 680.0, "calib/step_q_gap": 0.05562531969309448, "calib/step_q_w": 0.7041687979539643, "calib/step_q_w_n": 782.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2522.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 474.08984375, "completions/mean_terminated_length": 474.08984375, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.14506666666666668, "grad_norm": 0.005732369609177113, "learning_rate": 1.777777777777778e-06, "loss": 0.0432, "num_tokens": 30223068.0, "reward": 0.9765625, "reward_std": 0.25726181268692017, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.984375, "step": 136 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5126005530417295, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.3438339920948619, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2885375494071146, "calib/gap": -0.006987179487179751, "calib/mean_conf": 0.8657312252964426, "calib/mu_c": 0.8624999999999999, "calib/mu_w": 0.8694871794871797, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3360079051383401, "calib/std_conf": 0.07381714731970884, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7409504132231405, "calib/step_q_c_n": 726.0, "calib/step_q_gap": 0.031492285144322785, "calib/step_q_w": 0.7094581280788177, "calib/step_q_w_n": 812.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2778.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 459.9375, "completions/mean_terminated_length": 463.5590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.14613333333333334, "grad_norm": 0.004957827739417553, "learning_rate": 1.75e-06, "loss": 0.0157, "num_tokens": 30447796.0, "reward": 1.0234375, "reward_std": 0.21421602368354797, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.984375, "step": 137 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5012825445684237, "calib/avg_num_step_conf": 5.49609375, "calib/ece": 0.3270517928286852, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3107569721115538, "calib/gap": 0.003289085545722714, "calib/mean_conf": 0.8768525896414343, "calib/mu_c": 0.8783333333333334, "calib/mu_w": 0.8750442477876107, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3270517928286852, "calib/std_conf": 0.054947042444279365, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7471188251001336, "calib/step_q_c_n": 749.0, "calib/step_q_gap": -0.002957162741811681, "calib/step_q_w": 0.7500759878419453, "calib/step_q_w_n": 658.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 473.421875, "completions/mean_terminated_length": 475.2784729003906, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.1472, "grad_norm": 0.006393305957317352, "learning_rate": 1.7222222222222224e-06, "loss": 0.0518, "num_tokens": 30673328.0, "reward": 1.029296875, "reward_std": 0.34001946449279785, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.98046875, "step": 138 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5215581426830919, "calib/avg_num_step_conf": 5.0859375, "calib/ece": 0.22631372549019618, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.27450980392156865, "calib/gap": 0.0017117909841616319, "calib/mean_conf": 0.8772941176470588, "calib/mu_c": 0.8778915662650604, "calib/mu_w": 0.8761797752808987, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22631372549019618, "calib/std_conf": 0.05181539407779448, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7570445859872611, "calib/step_q_c_n": 785.0, "calib/step_q_gap": 0.003988493143934235, "calib/step_q_w": 0.7530560928433269, "calib/step_q_w_n": 517.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2453.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 426.875, "completions/mean_terminated_length": 426.875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.14826666666666666, "grad_norm": 0.0056905848905444145, "learning_rate": 1.6944444444444446e-06, "loss": 0.0293, "num_tokens": 30885704.0, "reward": 1.146484375, "reward_std": 0.21988742053508759, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.99609375, "step": 139 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5948792071030353, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.22039370078740164, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2874015748031496, "calib/gap": 0.01896895863445558, "calib/mean_conf": 0.8778740157480315, "calib/mu_c": 0.8843712574850299, "calib/mu_w": 0.8654022988505743, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22039370078740164, "calib/std_conf": 0.05324331197155695, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7770304568527918, "calib/step_q_c_n": 788.0, "calib/step_q_gap": 0.021379294062094267, "calib/step_q_w": 0.7556511627906976, "calib/step_q_w_n": 430.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 429.625, "completions/mean_terminated_length": 429.625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.14933333333333335, "grad_norm": 0.00584077462553978, "learning_rate": 1.6666666666666667e-06, "loss": 0.0172, "num_tokens": 31100704.0, "reward": 1.146484375, "reward_std": 0.2521313428878784, "rewards/accuracy_reward_step": 0.65234375, "rewards/format_reward_step": 0.98828125, "step": 140 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.590432098765432, "calib/avg_num_step_conf": 4.83984375, "calib/ece": 0.24349206349206354, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.32936507936507936, "calib/gap": 0.015925925925925788, "calib/mean_conf": 0.8863492063492064, "calib/mu_c": 0.892037037037037, "calib/mu_w": 0.8761111111111112, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24349206349206354, "calib/std_conf": 0.046103802387744704, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7677184466019417, "calib/step_q_c_n": 824.0, "calib/step_q_gap": 0.0450437478067609, "calib/step_q_w": 0.7226746987951808, "calib/step_q_w_n": 415.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 484.71875, "completions/mean_terminated_length": 484.71875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.1504, "grad_norm": 0.005316486582159996, "learning_rate": 1.638888888888889e-06, "loss": 0.0347, "num_tokens": 31331888.0, "reward": 1.12109375, "reward_std": 0.24240916967391968, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.9765625, "step": 141 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5043939002326182, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.3069444444444446, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.34523809523809523, "calib/gap": 0.0011592142672524064, "calib/mean_conf": 0.8836904761904761, "calib/mu_c": 0.8841780821917806, "calib/mu_w": 0.8830188679245282, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3056349206349207, "calib/std_conf": 0.04964428455182058, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7646995994659546, "calib/step_q_c_n": 749.0, "calib/step_q_gap": 0.020855711395262322, "calib/step_q_w": 0.7438438880706922, "calib/step_q_w_n": 679.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 493.76953125, "completions/mean_terminated_length": 495.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.15146666666666667, "grad_norm": 0.004341771826148033, "learning_rate": 1.6111111111111113e-06, "loss": 0.054, "num_tokens": 31563453.0, "reward": 1.060546875, "reward_std": 0.21372465789318085, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.98046875, "step": 142 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4680171964564877, "calib/avg_num_step_conf": 5.47265625, "calib/ece": 0.2893280632411067, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.31620553359683795, "calib/gap": -0.013604090672225122, "calib/mean_conf": 0.8774703557312253, "calib/mu_c": 0.8720394736842106, "calib/mu_w": 0.8856435643564358, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.28300395256916994, "calib/std_conf": 0.07470577406418852, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7722969543147208, "calib/step_q_c_n": 788.0, "calib/step_q_gap": 0.03821865088894594, "calib/step_q_w": 0.7340783034257748, "calib/step_q_w_n": 613.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2137.0, "completions/max_terminated_length": 2137.0, "completions/mean_length": 479.63671875, "completions/mean_terminated_length": 481.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.15253333333333333, "grad_norm": 0.005102099850773811, "learning_rate": 1.5833333333333333e-06, "loss": -0.0289, "num_tokens": 31793576.0, "reward": 1.080078125, "reward_std": 0.18172350525856018, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.97265625, "step": 143 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.41367079889807157, "calib/avg_num_step_conf": 4.70703125, "calib/ece": 0.23885375494071143, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.34782608695652173, "calib/gap": -0.01407575757575752, "calib/mean_conf": 0.8878656126482213, "calib/mu_c": 0.882969696969697, "calib/mu_w": 0.8970454545454545, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23727272727272725, "calib/std_conf": 0.04735900358614075, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7752658064516129, "calib/step_q_c_n": 775.0, "calib/step_q_gap": -0.012548147036759105, "calib/step_q_w": 0.787813953488372, "calib/step_q_w_n": 430.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 463.890625, "completions/mean_terminated_length": 465.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.1536, "grad_norm": 0.00613579573109746, "learning_rate": 1.5555555555555558e-06, "loss": 0.0353, "num_tokens": 32016460.0, "reward": 1.138671875, "reward_std": 0.2852437198162079, "rewards/accuracy_reward_step": 0.64453125, "rewards/format_reward_step": 0.98828125, "step": 144 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5328118209474142, "calib/avg_num_step_conf": 5.17578125, "calib/ece": 0.1929803921568627, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.403921568627451, "calib/gap": 0.006162538026944886, "calib/mean_conf": 0.8870980392156863, "calib/mu_c": 0.8889830508474575, "calib/mu_w": 0.8828205128205127, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1929803921568627, "calib/std_conf": 0.048630360444270194, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7572655367231639, "calib/step_q_c_n": 885.0, "calib/step_q_gap": -0.01821173600410897, "calib/step_q_w": 0.7754772727272728, "calib/step_q_w_n": 440.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2712.0, "completions/max_terminated_length": 2712.0, "completions/mean_length": 447.33984375, "completions/mean_terminated_length": 447.33984375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.15466666666666667, "grad_norm": 0.0053616659715771675, "learning_rate": 1.527777777777778e-06, "loss": 0.0454, "num_tokens": 32233683.0, "reward": 1.189453125, "reward_std": 0.24315764009952545, "rewards/accuracy_reward_step": 0.69140625, "rewards/format_reward_step": 0.99609375, "step": 145 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4438788577282623, "calib/avg_num_step_conf": 4.47265625, "calib/ece": 0.4364940239043825, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3426294820717131, "calib/gap": -0.010026251760788751, "calib/mean_conf": 0.8874900398406375, "calib/mu_c": 0.8820175438596493, "calib/mu_w": 0.892043795620438, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4349003984063745, "calib/std_conf": 0.04589606850245974, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7788316831683167, "calib/step_q_c_n": 505.0, "calib/step_q_gap": 0.033253558168316766, "calib/step_q_w": 0.745578125, "calib/step_q_w_n": 640.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 451.91796875, "completions/mean_terminated_length": 455.47637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.15573333333333333, "grad_norm": 0.0055298893712460995, "learning_rate": 1.5e-06, "loss": 0.0213, "num_tokens": 32456590.0, "reward": 0.93359375, "reward_std": 0.22286482155323029, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.9765625, "step": 146 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47230100207978826, "calib/avg_num_step_conf": 5.1328125, "calib/ece": 0.41333333333333333, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4365079365079365, "calib/gap": -0.014029117035356364, "calib/mean_conf": 0.8855555555555554, "calib/mu_c": 0.8783739837398374, "calib/mu_w": 0.8924031007751938, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4053968253968254, "calib/std_conf": 0.09434869095545957, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7628910614525138, "calib/step_q_c_n": 716.0, "calib/step_q_gap": 0.0005164795127144961, "calib/step_q_w": 0.7623745819397993, "calib/step_q_w_n": 598.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2790.0, "completions/max_terminated_length": 2790.0, "completions/mean_length": 482.1640625, "completions/mean_terminated_length": 485.96063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.1568, "grad_norm": 0.0051113637164235115, "learning_rate": 1.4722222222222225e-06, "loss": -0.0019, "num_tokens": 32683704.0, "reward": 0.970703125, "reward_std": 0.2150426059961319, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.98046875, "step": 147 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.41023061445596654, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.1826482213438736, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3675889328063241, "calib/gap": -0.01767373471598821, "calib/mean_conf": 0.883201581027668, "calib/mu_c": 0.8782417582417582, "calib/mu_w": 0.8959154929577464, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.17324110671936765, "calib/std_conf": 0.055343223691501256, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7595911214953271, "calib/step_q_c_n": 856.0, "calib/step_q_gap": 0.004690836595042325, "calib/step_q_w": 0.7549002849002848, "calib/step_q_w_n": 351.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2324.0, "completions/max_terminated_length": 2324.0, "completions/mean_length": 454.93359375, "completions/mean_terminated_length": 454.93359375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.15786666666666666, "grad_norm": 0.005389039404690266, "learning_rate": 1.4444444444444445e-06, "loss": 0.0286, "num_tokens": 32905279.0, "reward": 1.203125, "reward_std": 0.22485414147377014, "rewards/accuracy_reward_step": 0.7109375, "rewards/format_reward_step": 0.984375, "step": 148 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45914851728805217, "calib/avg_num_step_conf": 5.22265625, "calib/ece": 0.39027450980392164, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3607843137254902, "calib/gap": -0.003661867847914335, "calib/mean_conf": 0.884392156862745, "calib/mu_c": 0.8825396825396825, "calib/mu_w": 0.8862015503875968, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39027450980392164, "calib/std_conf": 0.05129234871327704, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7555536912751678, "calib/step_q_c_n": 596.0, "calib/step_q_gap": 0.014028725013359389, "calib/step_q_w": 0.7415249662618084, "calib/step_q_w_n": 741.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2449.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 514.65234375, "completions/mean_terminated_length": 514.65234375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.15893333333333334, "grad_norm": 0.00547590758651495, "learning_rate": 1.4166666666666667e-06, "loss": 0.0431, "num_tokens": 33141486.0, "reward": 0.990234375, "reward_std": 0.24513350427150726, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 0.99609375, "step": 149 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5161447505197505, "calib/avg_num_step_conf": 5.109375, "calib/ece": 0.3001587301587302, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.39285714285714285, "calib/gap": 0.00910083160083175, "calib/mean_conf": 0.8874603174603175, "calib/mu_c": 0.8912162162162163, "calib/mu_w": 0.8821153846153845, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3001587301587302, "calib/std_conf": 0.05608859924243593, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.778169398907104, "calib/step_q_c_n": 732.0, "calib/step_q_gap": 0.019957593351548453, "calib/step_q_w": 0.7582118055555556, "calib/step_q_w_n": 576.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 446.85546875, "completions/mean_terminated_length": 448.6078796386719, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.16, "grad_norm": 0.0062665254808962345, "learning_rate": 1.3888888888888892e-06, "loss": 0.0104, "num_tokens": 33360841.0, "reward": 1.06640625, "reward_std": 0.3003883957862854, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9765625, "step": 150 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4230597680642284, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.42529880478087667, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.46613545816733065, "calib/gap": -0.011984834968778002, "calib/mean_conf": 0.8954183266932271, "calib/mu_c": 0.8890677966101694, "calib/mu_w": 0.9010526315789474, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.42529880478087667, "calib/std_conf": 0.05261417392298819, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7676481481481481, "calib/step_q_c_n": 540.0, "calib/step_q_gap": 0.006076062258577597, "calib/step_q_w": 0.7615720858895705, "calib/step_q_w_n": 652.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 503.8828125, "completions/mean_terminated_length": 503.8828125, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.16106666666666666, "grad_norm": 0.005633654538542032, "learning_rate": 1.3611111111111112e-06, "loss": 0.0533, "num_tokens": 33596859.0, "reward": 0.94921875, "reward_std": 0.2380410134792328, "rewards/accuracy_reward_step": 0.4609375, "rewards/format_reward_step": 0.9765625, "step": 151 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4950817536316853, "calib/avg_num_step_conf": 5.109375, "calib/ece": 0.37423387096774186, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.40725806451612906, "calib/gap": -0.0002742492345777503, "calib/mean_conf": 0.8943951612903226, "calib/mu_c": 0.8942635658914727, "calib/mu_w": 0.8945378151260505, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.37423387096774186, "calib/std_conf": 0.04693463514918262, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7593402777777778, "calib/step_q_c_n": 576.0, "calib/step_q_gap": 0.010323884335154854, "calib/step_q_w": 0.7490163934426229, "calib/step_q_w_n": 732.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 480.80859375, "completions/mean_terminated_length": 482.69415283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.16213333333333332, "grad_norm": 0.0060722678899765015, "learning_rate": 1.3333333333333334e-06, "loss": 0.03, "num_tokens": 33825338.0, "reward": 0.984375, "reward_std": 0.27242544293403625, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.9609375, "step": 152 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4812564968814969, "calib/avg_num_step_conf": 4.9921875, "calib/ece": 0.30067460317460315, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.38492063492063494, "calib/gap": -0.003783783783783745, "calib/mean_conf": 0.8852777777777778, "calib/mu_c": 0.8837162162162163, "calib/mu_w": 0.8875000000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.29932539682539683, "calib/std_conf": 0.04716908775359317, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7728005865102638, "calib/step_q_c_n": 682.0, "calib/step_q_gap": 0.029981794563955222, "calib/step_q_w": 0.7428187919463086, "calib/step_q_w_n": 596.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 478.51171875, "completions/mean_terminated_length": 480.3882751464844, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1632, "grad_norm": 0.005258821416646242, "learning_rate": 1.3055555555555556e-06, "loss": 0.0326, "num_tokens": 34055157.0, "reward": 1.068359375, "reward_std": 0.23543894290924072, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.98046875, "step": 153 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4706623810424151, "calib/avg_num_step_conf": 4.31640625, "calib/ece": 0.3987698412698413, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3531746031746032, "calib/gap": -0.004539610512384051, "calib/mean_conf": 0.8830555555555556, "calib/mu_c": 0.8807317073170733, "calib/mu_w": 0.8852713178294573, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.39686507936507937, "calib/std_conf": 0.05169919184464851, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7663198458574182, "calib/step_q_c_n": 519.0, "calib/step_q_gap": 0.030193565993936966, "calib/step_q_w": 0.7361262798634812, "calib/step_q_w_n": 586.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 450.375, "completions/mean_terminated_length": 452.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.16426666666666667, "grad_norm": 0.00625656358897686, "learning_rate": 1.2777777777777779e-06, "loss": 0.0556, "num_tokens": 34274893.0, "reward": 0.96875, "reward_std": 0.26861146092414856, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.9765625, "step": 154 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49847333000997013, "calib/avg_num_step_conf": 4.8359375, "calib/ece": 0.34374015748031495, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3228346456692913, "calib/gap": 0.004805583250249157, "calib/mean_conf": 0.8757086614173228, "calib/mu_c": 0.8779411764705882, "calib/mu_w": 0.8731355932203391, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.342007874015748, "calib/std_conf": 0.08050584272201257, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.719593984962406, "calib/step_q_c_n": 665.0, "calib/step_q_gap": -0.028154706137070495, "calib/step_q_w": 0.7477486910994765, "calib/step_q_w_n": 573.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 878.0, "completions/max_terminated_length": 878.0, "completions/mean_length": 428.171875, "completions/mean_terminated_length": 429.85101318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.16533333333333333, "grad_norm": 0.006096397992223501, "learning_rate": 1.25e-06, "loss": -0.0035, "num_tokens": 34491721.0, "reward": 1.0234375, "reward_std": 0.2608669400215149, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.984375, "step": 155 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5112410071942446, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.3293227091633467, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3745019920318725, "calib/gap": 0.009480986639260114, "calib/mean_conf": 0.8831075697211156, "calib/mu_c": 0.8873381294964029, "calib/mu_w": 0.8778571428571428, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3293227091633467, "calib/std_conf": 0.06063988456510979, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7347422680412372, "calib/step_q_c_n": 679.0, "calib/step_q_gap": -0.008879265400850933, "calib/step_q_w": 0.7436215334420881, "calib/step_q_w_n": 613.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 496.66015625, "completions/mean_terminated_length": 496.66015625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.1664, "grad_norm": 0.005818387493491173, "learning_rate": 1.2222222222222223e-06, "loss": 0.0341, "num_tokens": 34723626.0, "reward": 1.029296875, "reward_std": 0.2707667052745819, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.97265625, "step": 156 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4583667200854701, "calib/avg_num_step_conf": 5.09375, "calib/ece": 0.2544444444444445, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2896825396825397, "calib/gap": 0.006145833333333295, "calib/mean_conf": 0.8734920634920635, "calib/mu_c": 0.8758333333333334, "calib/mu_w": 0.8696875000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2544444444444445, "calib/std_conf": 0.07091037782330305, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7502247191011236, "calib/step_q_c_n": 801.0, "calib/step_q_gap": 0.003962293653807514, "calib/step_q_w": 0.7462624254473161, "calib/step_q_w_n": 503.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2289.0, "completions/max_terminated_length": 2289.0, "completions/mean_length": 470.64453125, "completions/mean_terminated_length": 474.35040283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.16746666666666668, "grad_norm": 0.006078007165342569, "learning_rate": 1.1944444444444446e-06, "loss": -0.01, "num_tokens": 34947839.0, "reward": 1.1015625, "reward_std": 0.28636425733566284, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.9765625, "step": 157 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5060596026490066, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.2841832669322709, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3545816733067729, "calib/gap": 0.001956291390728504, "calib/mean_conf": 0.8857768924302788, "calib/mu_c": 0.8865562913907286, "calib/mu_w": 0.8846, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2841832669322709, "calib/std_conf": 0.046095497009212054, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7688340192043897, "calib/step_q_c_n": 729.0, "calib/step_q_gap": 0.04169116206153256, "calib/step_q_w": 0.7271428571428571, "calib/step_q_w_n": 518.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 497.6953125, "completions/mean_terminated_length": 497.6953125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.16853333333333334, "grad_norm": 0.006711493246257305, "learning_rate": 1.1666666666666668e-06, "loss": 0.1127, "num_tokens": 35180489.0, "reward": 1.07421875, "reward_std": 0.2854154706001282, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.96875, "step": 158 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4503373313343328, "calib/avg_num_step_conf": 4.3515625, "calib/ece": 0.3520866141732284, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.39763779527559057, "calib/gap": -0.008489505247376239, "calib/mean_conf": 0.8890944881889764, "calib/mu_c": 0.885217391304348, "calib/mu_w": 0.8937068965517242, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.34893700787401577, "calib/std_conf": 0.04508486015603499, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.767580919931857, "calib/step_q_c_n": 587.0, "calib/step_q_gap": 0.0019452463075685378, "calib/step_q_w": 0.7656356736242884, "calib/step_q_w_n": 527.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 436.96875, "completions/mean_terminated_length": 436.96875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.1696, "grad_norm": 0.007543331012129784, "learning_rate": 1.138888888888889e-06, "loss": 0.0626, "num_tokens": 35397137.0, "reward": 1.029296875, "reward_std": 0.3060174286365509, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.98046875, "step": 159 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5002294329727959, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.3379435483870967, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.3225806451612903, "calib/gap": -0.0024431333988854664, "calib/mean_conf": 0.8822983870967742, "calib/mu_c": 0.8811851851851853, "calib/mu_w": 0.8836283185840708, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3379435483870967, "calib/std_conf": 0.0479915113190746, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7541725352112676, "calib/step_q_c_n": 568.0, "calib/step_q_gap": 0.012453236965653591, "calib/step_q_w": 0.741719298245614, "calib/step_q_w_n": 570.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 490.06640625, "completions/mean_terminated_length": 493.9252014160156, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.17066666666666666, "grad_norm": 0.0057596382685005665, "learning_rate": 1.111111111111111e-06, "loss": 0.0433, "num_tokens": 35627434.0, "reward": 1.0078125, "reward_std": 0.289905846118927, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.9609375, "step": 160 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46350392512077293, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.16726562500000008, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.37109375, "calib/gap": -0.00421497584541064, "calib/mean_conf": 0.883359375, "calib/mu_c": 0.8821739130434783, "calib/mu_w": 0.8863888888888889, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1659375000000001, "calib/std_conf": 0.05558323352962992, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7384870848708487, "calib/step_q_c_n": 813.0, "calib/step_q_gap": -0.02151291512915121, "calib/step_q_w": 0.7599999999999999, "calib/step_q_w_n": 373.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2291.0, "completions/max_terminated_length": 2291.0, "completions/mean_length": 427.3203125, "completions/mean_terminated_length": 428.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.17173333333333332, "grad_norm": 0.005399015732109547, "learning_rate": 1.0833333333333335e-06, "loss": 0.0224, "num_tokens": 35840748.0, "reward": 1.216796875, "reward_std": 0.19859883189201355, "rewards/accuracy_reward_step": 0.71875, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5375821467688937, "calib/avg_num_step_conf": 4.3125, "calib/ece": 0.2282677165354331, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3188976377952756, "calib/gap": 0.0099000547645125, "calib/mean_conf": 0.8818110236220473, "calib/mu_c": 0.8852409638554216, "calib/mu_w": 0.8753409090909091, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2282677165354331, "calib/std_conf": 0.0501009582763847, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7815418502202643, "calib/step_q_c_n": 681.0, "calib/step_q_gap": 0.026459107903479495, "calib/step_q_w": 0.7550827423167848, "calib/step_q_w_n": 423.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2126.0, "completions/max_terminated_length": 2126.0, "completions/mean_length": 413.90625, "completions/mean_terminated_length": 413.90625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.1728, "grad_norm": 0.005997710861265659, "learning_rate": 1.0555555555555557e-06, "loss": 0.0414, "num_tokens": 36050852.0, "reward": 1.142578125, "reward_std": 0.2104301154613495, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.98828125, "step": 162 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4081221572449643, "calib/avg_num_step_conf": 5.125, "calib/ece": 0.34650602409638553, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.40160642570281124, "calib/gap": -0.01474074074074061, "calib/mean_conf": 0.8886746987951807, "calib/mu_c": 0.8819259259259259, "calib/mu_w": 0.8966666666666665, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.34650602409638553, "calib/std_conf": 0.04568367976418741, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7717492711370262, "calib/step_q_c_n": 686.0, "calib/step_q_gap": 0.03189304110507729, "calib/step_q_w": 0.7398562300319489, "calib/step_q_w_n": 626.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 520.79296875, "completions/mean_terminated_length": 520.79296875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.17386666666666667, "grad_norm": 0.00483884708955884, "learning_rate": 1.0277777777777777e-06, "loss": 0.09, "num_tokens": 36289007.0, "reward": 1.013671875, "reward_std": 0.21470002830028534, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.97265625, "step": 163 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.46222222222222226, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.34212, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.304, "calib/gap": -0.005896940418679386, "calib/mean_conf": 0.88212, "calib/mu_c": 0.8794074074074074, "calib/mu_w": 0.8853043478260868, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.34212, "calib/std_conf": 0.044306947536475585, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7618051118210863, "calib/step_q_c_n": 626.0, "calib/step_q_gap": 0.016949342590317107, "calib/step_q_w": 0.7448557692307692, "calib/step_q_w_n": 624.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2787.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 536.5625, "completions/mean_terminated_length": 536.5625, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.17493333333333333, "grad_norm": 0.005693615879863501, "learning_rate": 1.0000000000000002e-06, "loss": 0.0656, "num_tokens": 36532503.0, "reward": 1.013671875, "reward_std": 0.23496907949447632, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.97265625, "step": 164 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.508505315822389, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.3952964426877471, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.35968379446640314, "calib/gap": 0.0014258911819887699, "calib/mean_conf": 0.8814624505928854, "calib/mu_c": 0.8821951219512196, "calib/mu_w": 0.8807692307692309, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3952964426877471, "calib/std_conf": 0.051197474511602, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7888933601609658, "calib/step_q_c_n": 497.0, "calib/step_q_gap": 0.027278352004358908, "calib/step_q_w": 0.7616150081566069, "calib/step_q_w_n": 613.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1421.0, "completions/max_terminated_length": 1421.0, "completions/mean_length": 478.30078125, "completions/mean_terminated_length": 482.0669250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.176, "grad_norm": 0.006250341422855854, "learning_rate": 9.722222222222224e-07, "loss": -0.0322, "num_tokens": 36760524.0, "reward": 0.96875, "reward_std": 0.25991344451904297, "rewards/accuracy_reward_step": 0.48046875, "rewards/format_reward_step": 0.9765625, "step": 165 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4818796068796068, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.18104, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.324, "calib/gap": 0.0003578009828011375, "calib/mean_conf": 0.88336, "calib/mu_c": 0.8834659090909092, "calib/mu_w": 0.883108108108108, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.1802, "calib/std_conf": 0.051829628592147944, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.782295673076923, "calib/step_q_c_n": 832.0, "calib/step_q_gap": 0.004539991258741272, "calib/step_q_w": 0.7777556818181818, "calib/step_q_w_n": 352.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2726.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 509.8828125, "completions/mean_terminated_length": 509.8828125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.17706666666666668, "grad_norm": 0.00509639410302043, "learning_rate": 9.444444444444445e-07, "loss": 0.0401, "num_tokens": 36997238.0, "reward": 1.1640625, "reward_std": 0.2525672912597656, "rewards/accuracy_reward_step": 0.6875, "rewards/format_reward_step": 0.953125, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47980446728038256, "calib/avg_num_step_conf": 4.55078125, "calib/ece": 0.19767716535433077, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2874015748031496, "calib/gap": -0.004148290872760918, "calib/mean_conf": 0.8787795275590552, "calib/mu_c": 0.877456647398844, "calib/mu_w": 0.8816049382716049, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19767716535433077, "calib/std_conf": 0.04546875943784006, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7828692493946733, "calib/step_q_c_n": 826.0, "calib/step_q_gap": 0.02437367417343439, "calib/step_q_w": 0.7584955752212389, "calib/step_q_w_n": 339.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 444.015625, "completions/mean_terminated_length": 444.015625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.17813333333333334, "grad_norm": 0.005671403370797634, "learning_rate": 9.166666666666666e-07, "loss": 0.0097, "num_tokens": 37216514.0, "reward": 1.173828125, "reward_std": 0.2115381509065628, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.98828125, "step": 167 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48184084630415747, "calib/avg_num_step_conf": 4.67578125, "calib/ece": 0.2668800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.344, "calib/gap": -0.0004824472744423547, "calib/mean_conf": 0.8788800000000001, "calib/mu_c": 0.8786928104575165, "calib/mu_w": 0.8791752577319588, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2668800000000001, "calib/std_conf": 0.0542028191148763, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7740029985007496, "calib/step_q_c_n": 667.0, "calib/step_q_gap": 0.09147469661395724, "calib/step_q_w": 0.6825283018867924, "calib/step_q_w_n": 530.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1516.0, "completions/max_terminated_length": 1516.0, "completions/mean_length": 486.69921875, "completions/mean_terminated_length": 490.531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.1792, "grad_norm": 0.005674854852259159, "learning_rate": 8.88888888888889e-07, "loss": -0.0191, "num_tokens": 37445781.0, "reward": 1.08203125, "reward_std": 0.29917871952056885, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.96875, "step": 168 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.42087472406181015, "calib/avg_num_step_conf": 4.07421875, "calib/ece": 0.27582995951417005, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.3441295546558704, "calib/gap": -0.012168184326710851, "calib/mean_conf": 0.884331983805668, "calib/mu_c": 0.8796026490066224, "calib/mu_w": 0.8917708333333333, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.274412955465587, "calib/std_conf": 0.05110379916187314, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7932115677321159, "calib/step_q_c_n": 657.0, "calib/step_q_gap": 0.02049136047822986, "calib/step_q_w": 0.772720207253886, "calib/step_q_w_n": 386.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 460.453125, "completions/mean_terminated_length": 464.0787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.18026666666666666, "grad_norm": 0.005321961361914873, "learning_rate": 8.611111111111112e-07, "loss": 0.005, "num_tokens": 37667841.0, "reward": 1.064453125, "reward_std": 0.213637113571167, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.94921875, "step": 169 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5366047745358091, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.3043373493975903, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3815261044176707, "calib/gap": 0.008641246684350179, "calib/mean_conf": 0.8866666666666666, "calib/mu_c": 0.8902758620689656, "calib/mu_w": 0.8816346153846154, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3043373493975903, "calib/std_conf": 0.046926984784205795, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7943703703703704, "calib/step_q_c_n": 675.0, "calib/step_q_gap": 0.03692046144505168, "calib/step_q_w": 0.7574499089253187, "calib/step_q_w_n": 549.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 479.66796875, "completions/mean_terminated_length": 485.3557434082031, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.18133333333333335, "grad_norm": 0.005628409795463085, "learning_rate": 8.333333333333333e-07, "loss": -0.0135, "num_tokens": 37894788.0, "reward": 1.0546875, "reward_std": 0.22039489448070526, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.96875, "step": 170 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5163370326740654, "calib/avg_num_step_conf": 4.51953125, "calib/ece": 0.3821653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.28346456692913385, "calib/gap": 0.0019685039370079815, "calib/mean_conf": 0.8821653543307086, "calib/mu_c": 0.8831496062992126, "calib/mu_w": 0.8811811023622046, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3821653543307087, "calib/std_conf": 0.044761355622433004, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7756476964769647, "calib/step_q_c_n": 615.0, "calib/step_q_gap": 0.018378323783237782, "calib/step_q_w": 0.757269372693727, "calib/step_q_w_n": 542.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 481.66796875, "completions/mean_terminated_length": 481.66796875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.1824, "grad_norm": 0.00556573923677206, "learning_rate": 8.055555555555557e-07, "loss": 0.0295, "num_tokens": 38124991.0, "reward": 0.990234375, "reward_std": 0.2234094738960266, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.98828125, "step": 171 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5799045915324985, "calib/avg_num_step_conf": 4.5703125, "calib/ece": 0.19524000000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.312, "calib/gap": 0.011417710196780284, "calib/mean_conf": 0.88324, "calib/mu_c": 0.8868023255813955, "calib/mu_w": 0.8753846153846152, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19524000000000002, "calib/std_conf": 0.04096220697179292, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7867217280813216, "calib/step_q_c_n": 787.0, "calib/step_q_gap": 0.006538960457300624, "calib/step_q_w": 0.780182767624021, "calib/step_q_w_n": 383.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2475.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 464.640625, "completions/mean_terminated_length": 464.640625, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.18346666666666667, "grad_norm": 0.005885588005185127, "learning_rate": 7.777777777777779e-07, "loss": 0.0429, "num_tokens": 38347291.0, "reward": 1.16015625, "reward_std": 0.24455931782722473, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 0.9765625, "step": 172 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4641386410432395, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.2740562248995985, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.357429718875502, "calib/gap": -0.00984694577899814, "calib/mean_conf": 0.8861044176706827, "calib/mu_c": 0.8823870967741936, "calib/mu_w": 0.8922340425531917, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.26883534136546194, "calib/std_conf": 0.04761463803899582, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7915049751243782, "calib/step_q_c_n": 804.0, "calib/step_q_gap": -0.00011489528598884746, "calib/step_q_w": 0.7916198704103671, "calib/step_q_w_n": 463.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1610.0, "completions/max_terminated_length": 1610.0, "completions/mean_length": 512.10546875, "completions/mean_terminated_length": 514.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.18453333333333333, "grad_norm": 0.005655062850564718, "learning_rate": 7.5e-07, "loss": 0.0318, "num_tokens": 38581550.0, "reward": 1.091796875, "reward_std": 0.23605836927890778, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.97265625, "step": 173 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4939894815927873, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.38590909090909087, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.32644628099173556, "calib/gap": 0.004710743801653039, "calib/mean_conf": 0.885909090909091, "calib/mu_c": 0.8882644628099174, "calib/mu_w": 0.8835537190082644, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.38590909090909087, "calib/std_conf": 0.05268219075874214, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7820869565217391, "calib/step_q_c_n": 575.0, "calib/step_q_gap": -0.00025852683800731135, "calib/step_q_w": 0.7823454833597464, "calib/step_q_w_n": 631.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 597.12890625, "completions/mean_terminated_length": 597.12890625, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.1856, "grad_norm": 0.0053953444585204124, "learning_rate": 7.222222222222222e-07, "loss": 0.0696, "num_tokens": 38838647.0, "reward": 0.94921875, "reward_std": 0.33002573251724243, "rewards/accuracy_reward_step": 0.4765625, "rewards/format_reward_step": 0.9453125, "step": 174 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4530963001836788, "calib/avg_num_step_conf": 5.26953125, "calib/ece": 0.4718326693227094, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3187250996015936, "calib/gap": -0.004855024927840179, "calib/mean_conf": 0.8795617529880477, "calib/mu_c": 0.8766990291262139, "calib/mu_w": 0.8815540540540541, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4705179282868528, "calib/std_conf": 0.05811966611190145, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7687124463519314, "calib/step_q_c_n": 466.0, "calib/step_q_gap": 0.03280078157276933, "calib/step_q_w": 0.735911664779162, "calib/step_q_w_n": 883.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2460.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 524.8984375, "completions/mean_terminated_length": 526.9569091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.18666666666666668, "grad_norm": 0.005566365551203489, "learning_rate": 6.944444444444446e-07, "loss": 0.0265, "num_tokens": 39078845.0, "reward": 0.888671875, "reward_std": 0.26503777503967285, "rewards/accuracy_reward_step": 0.40234375, "rewards/format_reward_step": 0.97265625, "step": 175 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4482153846153846, "calib/avg_num_step_conf": 4.9453125, "calib/ece": 0.38505882352941173, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.28627450980392155, "calib/gap": 0.00020615384615396692, "calib/mean_conf": 0.8752549019607843, "calib/mu_c": 0.8753600000000001, "calib/mu_w": 0.8751538461538462, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.38505882352941173, "calib/std_conf": 0.07246252395501805, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7838860971524289, "calib/step_q_c_n": 597.0, "calib/step_q_gap": -0.007773095672683139, "calib/step_q_w": 0.791659192825112, "calib/step_q_w_n": 669.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1648.0, "completions/max_terminated_length": 1648.0, "completions/mean_length": 473.9375, "completions/mean_terminated_length": 475.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.18773333333333334, "grad_norm": 0.005453173536807299, "learning_rate": 6.666666666666667e-07, "loss": -0.0133, "num_tokens": 39304237.0, "reward": 0.978515625, "reward_std": 0.23507677018642426, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.98046875, "step": 176 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.472159090909091, "calib/avg_num_step_conf": 5.16015625, "calib/ece": 0.3151181102362206, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.35826771653543305, "calib/gap": -0.0011994949494948948, "calib/mean_conf": 0.8820472440944882, "calib/mu_c": 0.8815277777777778, "calib/mu_w": 0.8827272727272727, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3151181102362206, "calib/std_conf": 0.04767571862015603, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7901402524544181, "calib/step_q_c_n": 713.0, "calib/step_q_gap": 0.03043630508599704, "calib/step_q_w": 0.7597039473684211, "calib/step_q_w_n": 608.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 487.078125, "completions/mean_terminated_length": 487.078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.1888, "grad_norm": 0.00511728972196579, "learning_rate": 6.388888888888889e-07, "loss": 0.0236, "num_tokens": 39532761.0, "reward": 1.05859375, "reward_std": 0.22648221254348755, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.9921875, "step": 177 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4563706563706563, "calib/avg_num_step_conf": 4.55859375, "calib/ece": 0.2911857707509883, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": -0.00883333333333336, "calib/mean_conf": 0.8761660079051384, "calib/mu_c": 0.8724999999999999, "calib/mu_w": 0.8813333333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2911857707509883, "calib/std_conf": 0.046352363600316075, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7920760233918128, "calib/step_q_c_n": 684.0, "calib/step_q_gap": 0.005471468526388246, "calib/step_q_w": 0.7866045548654246, "calib/step_q_w_n": 483.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2268.0, "completions/max_terminated_length": 2268.0, "completions/mean_length": 440.015625, "completions/mean_terminated_length": 441.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.18986666666666666, "grad_norm": 0.006147797219455242, "learning_rate": 6.111111111111112e-07, "loss": 0.0062, "num_tokens": 39751477.0, "reward": 1.076171875, "reward_std": 0.24575549364089966, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.98828125, "step": 178 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.48160511363636366, "calib/avg_num_step_conf": 5.31640625, "calib/ece": 0.23641129032258065, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.2661290322580645, "calib/gap": -0.003903409090908805, "calib/mean_conf": 0.8790725806451613, "calib/mu_c": 0.8776875000000001, "calib/mu_w": 0.8815909090909089, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23516129032258065, "calib/std_conf": 0.04278069985913093, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7918959537572254, "calib/step_q_c_n": 865.0, "calib/step_q_gap": 0.05364998601528981, "calib/step_q_w": 0.7382459677419356, "calib/step_q_w_n": 496.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 496.18359375, "completions/mean_terminated_length": 500.0905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.19093333333333334, "grad_norm": 0.0067147728987038136, "learning_rate": 5.833333333333334e-07, "loss": 0.0132, "num_tokens": 39984764.0, "reward": 1.107421875, "reward_std": 0.3205963373184204, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.96484375, "step": 179 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4240544731342277, "calib/avg_num_step_conf": 5.625, "calib/ece": 0.24299212598425196, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.32677165354330706, "calib/gap": -0.009077057911413555, "calib/mean_conf": 0.8847244094488189, "calib/mu_c": 0.8814723926380369, "calib/mu_w": 0.8905494505494504, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24299212598425196, "calib/std_conf": 0.04676674859617193, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7938191489361701, "calib/step_q_c_n": 940.0, "calib/step_q_gap": 0.010999148936170045, "calib/step_q_w": 0.7828200000000001, "calib/step_q_w_n": 500.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 564.0390625, "completions/mean_terminated_length": 566.2510375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.192, "grad_norm": 0.00554621359333396, "learning_rate": 5.555555555555555e-07, "loss": -0.0037, "num_tokens": 40233014.0, "reward": 1.12890625, "reward_std": 0.1938880831003189, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.984375, "step": 180 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.578968929224176, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.3655952380952382, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2777777777777778, "calib/gap": 0.012110039705048092, "calib/mean_conf": 0.8775, "calib/mu_c": 0.8834108527131783, "calib/mu_w": 0.8713008130081302, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3655952380952382, "calib/std_conf": 0.04577051972821017, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8016199376947041, "calib/step_q_c_n": 642.0, "calib/step_q_gap": 0.015024955615851154, "calib/step_q_w": 0.7865949820788529, "calib/step_q_w_n": 558.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 478.40234375, "completions/mean_terminated_length": 478.40234375, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.19306666666666666, "grad_norm": 0.006133066024631262, "learning_rate": 5.277777777777779e-07, "loss": 0.0607, "num_tokens": 40461749.0, "reward": 0.99609375, "reward_std": 0.2409520447254181, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.984375, "step": 181 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4225753061900033, "calib/avg_num_step_conf": 4.8515625, "calib/ece": 0.2615354330708661, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3346456692913386, "calib/gap": -0.010688513737173166, "calib/mean_conf": 0.8875196850393701, "calib/mu_c": 0.8835220125786163, "calib/mu_w": 0.8942105263157895, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2615354330708661, "calib/std_conf": 0.042710221239487825, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7977588466579293, "calib/step_q_c_n": 763.0, "calib/step_q_gap": 0.011099138933503228, "calib/step_q_w": 0.7866597077244261, "calib/step_q_w_n": 479.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2375.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 490.37890625, "completions/mean_terminated_length": 490.37890625, "completions/min_length": 217.0, "completions/min_terminated_length": 217.0, "epoch": 0.19413333333333332, "grad_norm": 0.004917373415082693, "learning_rate": 5.000000000000001e-07, "loss": 0.0375, "num_tokens": 40693446.0, "reward": 1.1171875, "reward_std": 0.21607306599617004, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9921875, "step": 182 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5172730148687125, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.30925196850393716, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3228346456692913, "calib/gap": 0.012420752926288858, "calib/mean_conf": 0.8801181102362206, "calib/mu_c": 0.8854482758620689, "calib/mu_w": 0.87302752293578, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30925196850393716, "calib/std_conf": 0.05927038194542323, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7826462395543177, "calib/step_q_c_n": 718.0, "calib/step_q_gap": 0.018627890930464353, "calib/step_q_w": 0.7640183486238533, "calib/step_q_w_n": 545.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 527.984375, "completions/mean_terminated_length": 527.984375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.1952, "grad_norm": 0.006127191707491875, "learning_rate": 4.7222222222222226e-07, "loss": 0.0204, "num_tokens": 40935290.0, "reward": 1.060546875, "reward_std": 0.2705356776714325, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98828125, "step": 183 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5093615494978478, "calib/avg_num_step_conf": 5.1171875, "calib/ece": 0.21194444444444455, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.36904761904761907, "calib/gap": 0.0013041606886654566, "calib/mean_conf": 0.8839285714285714, "calib/mu_c": 0.8843529411764705, "calib/mu_w": 0.883048780487805, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21063492063492073, "calib/std_conf": 0.048542679400255695, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7907167630057802, "calib/step_q_c_n": 865.0, "calib/step_q_gap": 0.014334740533870072, "calib/step_q_w": 0.7763820224719101, "calib/step_q_w_n": 445.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 497.1640625, "completions/mean_terminated_length": 499.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.19626666666666667, "grad_norm": 0.006099920254200697, "learning_rate": 4.444444444444445e-07, "loss": 0.029, "num_tokens": 41167844.0, "reward": 1.15234375, "reward_std": 0.28207772970199585, "rewards/accuracy_reward_step": 0.6640625, "rewards/format_reward_step": 0.9765625, "step": 184 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5506459593183067, "calib/avg_num_step_conf": 5.12109375, "calib/ece": 0.32098765432098775, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.32098765432098764, "calib/gap": 0.011863661352391586, "calib/mean_conf": 0.8806584362139918, "calib/mu_c": 0.8858823529411766, "calib/mu_w": 0.874018691588785, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.32098765432098775, "calib/std_conf": 0.05225352293657308, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7920152671755725, "calib/step_q_c_n": 655.0, "calib/step_q_gap": 0.04745734034630411, "calib/step_q_w": 0.7445579268292684, "calib/step_q_w_n": 656.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 560.9609375, "completions/mean_terminated_length": 567.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.19733333333333333, "grad_norm": 0.005671345628798008, "learning_rate": 4.1666666666666667e-07, "loss": 0.0715, "num_tokens": 41418370.0, "reward": 1.005859375, "reward_std": 0.28620392084121704, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.94921875, "step": 185 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.3939333598831807, "calib/avg_num_step_conf": 5.234375, "calib/ece": 0.2565882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.33725490196078434, "calib/gap": -0.0162206292313819, "calib/mean_conf": 0.8827058823529411, "calib/mu_c": 0.8767901234567903, "calib/mu_w": 0.8930107526881722, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25199999999999995, "calib/std_conf": 0.04411362073130446, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7841351660939291, "calib/step_q_c_n": 873.0, "calib/step_q_gap": 0.002336450890502939, "calib/step_q_w": 0.7817987152034261, "calib/step_q_w_n": 467.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1585.0, "completions/max_terminated_length": 1585.0, "completions/mean_length": 492.640625, "completions/mean_terminated_length": 494.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1984, "grad_norm": 0.005457146093249321, "learning_rate": 3.8888888888888895e-07, "loss": -0.011, "num_tokens": 41649526.0, "reward": 1.134765625, "reward_std": 0.23541419208049774, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4931908396946565, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.3692968750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.29296875, "calib/gap": -0.0005166412213740701, "calib/mean_conf": 0.8810156250000001, "calib/mu_c": 0.8807633587786259, "calib/mu_w": 0.88128, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3692968750000001, "calib/std_conf": 0.04707606616805801, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7949239999999999, "calib/step_q_c_n": 750.0, "calib/step_q_gap": 0.022532695652173862, "calib/step_q_w": 0.772391304347826, "calib/step_q_w_n": 598.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1139.0, "completions/max_terminated_length": 1139.0, "completions/mean_length": 497.23046875, "completions/mean_terminated_length": 499.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.19946666666666665, "grad_norm": 0.0053702532313764095, "learning_rate": 3.611111111111111e-07, "loss": 0.0113, "num_tokens": 41878361.0, "reward": 1.009765625, "reward_std": 0.26010939478874207, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.99609375, "step": 187 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4917698580489278, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.2009236947791166, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3172690763052209, "calib/gap": -0.0022689519782542744, "calib/mean_conf": 0.8862248995983936, "calib/mu_c": 0.8855232558139535, "calib/mu_w": 0.8877922077922078, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19819277108433747, "calib/std_conf": 0.043077002391416236, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7952840909090909, "calib/step_q_c_n": 880.0, "calib/step_q_gap": 0.0007307140027730252, "calib/step_q_w": 0.7945533769063179, "calib/step_q_w_n": 459.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 548.7421875, "completions/mean_terminated_length": 555.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.20053333333333334, "grad_norm": 0.004811764694750309, "learning_rate": 3.3333333333333335e-07, "loss": -0.0023, "num_tokens": 42122911.0, "reward": 1.158203125, "reward_std": 0.26997411251068115, "rewards/accuracy_reward_step": 0.671875, "rewards/format_reward_step": 0.97265625, "step": 188 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.542660054676076, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.30200787401574825, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2755905511811024, "calib/gap": 0.0077519231991860504, "calib/mean_conf": 0.8807480314960632, "calib/mu_c": 0.8840136054421768, "calib/mu_w": 0.8762616822429907, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30200787401574825, "calib/std_conf": 0.0403116732481052, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8018431911966989, "calib/step_q_c_n": 727.0, "calib/step_q_gap": 0.00928413607858869, "calib/step_q_w": 0.7925590551181102, "calib/step_q_w_n": 508.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 479.453125, "completions/mean_terminated_length": 481.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.2016, "grad_norm": 0.004866011440753937, "learning_rate": 3.055555555555556e-07, "loss": 0.0141, "num_tokens": 42353419.0, "reward": 1.0703125, "reward_std": 0.19307208061218262, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9921875, "step": 189 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5403007227671658, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.2946640316205533, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3241106719367589, "calib/gap": 0.005454310789881234, "calib/mean_conf": 0.8835968379446639, "calib/mu_c": 0.8858389261744967, "calib/mu_w": 0.8803846153846154, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2946640316205533, "calib/std_conf": 0.042844828831381035, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7854931335830213, "calib/step_q_c_n": 801.0, "calib/step_q_gap": 0.01404551453540226, "calib/step_q_w": 0.771447619047619, "calib/step_q_w_n": 525.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 550.4921875, "completions/mean_terminated_length": 552.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.20266666666666666, "grad_norm": 0.005678132176399231, "learning_rate": 2.7777777777777776e-07, "loss": 0.024, "num_tokens": 42599953.0, "reward": 1.076171875, "reward_std": 0.2409372329711914, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.98828125, "step": 190 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4537071535022355, "calib/avg_num_step_conf": 5.7578125, "calib/ece": 0.36531496062992125, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.35826771653543305, "calib/gap": -0.007728514654744112, "calib/mean_conf": 0.8850000000000001, "calib/mu_c": 0.8812878787878788, "calib/mu_w": 0.889016393442623, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36531496062992125, "calib/std_conf": 0.04383891414828597, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7853507565337002, "calib/step_q_c_n": 727.0, "calib/step_q_gap": -0.01036544159213637, "calib/step_q_w": 0.7957161981258366, "calib/step_q_w_n": 747.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2096.0, "completions/max_terminated_length": 2096.0, "completions/mean_length": 509.91796875, "completions/mean_terminated_length": 511.91766357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.20373333333333332, "grad_norm": 0.005636806599795818, "learning_rate": 2.5000000000000004e-07, "loss": -0.0047, "num_tokens": 42834660.0, "reward": 1.01171875, "reward_std": 0.2590974271297455, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.9921875, "step": 191 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47105037301115726, "calib/avg_num_step_conf": 4.734375, "calib/ece": 0.2821825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3055555555555556, "calib/gap": -0.006280451574569024, "calib/mean_conf": 0.8830555555555557, "calib/mu_c": 0.8805882352941178, "calib/mu_w": 0.8868686868686868, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2790476190476191, "calib/std_conf": 0.04528337680835594, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7964315352697097, "calib/step_q_c_n": 723.0, "calib/step_q_gap": 0.05088961297932104, "calib/step_q_w": 0.7455419222903886, "calib/step_q_w_n": 489.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2667.0, "completions/max_terminated_length": 2667.0, "completions/mean_length": 519.4609375, "completions/mean_terminated_length": 521.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.2048, "grad_norm": 0.006023888476192951, "learning_rate": 2.2222222222222224e-07, "loss": 0.0452, "num_tokens": 43072618.0, "reward": 1.083984375, "reward_std": 0.2951584458351135, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.97265625, "step": 192 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4333597516676574, "calib/avg_num_step_conf": 5.1015625, "calib/ece": 0.29244, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.292, "calib/gap": -0.006681857208902908, "calib/mean_conf": 0.8804399999999999, "calib/mu_c": 0.877687074829932, "calib/mu_w": 0.8843689320388349, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.29244, "calib/std_conf": 0.04479516045288822, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8042016806722688, "calib/step_q_c_n": 714.0, "calib/step_q_gap": 0.004809788780376967, "calib/step_q_w": 0.7993918918918919, "calib/step_q_w_n": 592.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 509.51953125, "completions/mean_terminated_length": 513.531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.20586666666666667, "grad_norm": 0.005428059492260218, "learning_rate": 1.9444444444444447e-07, "loss": 0.0513, "num_tokens": 43308767.0, "reward": 1.060546875, "reward_std": 0.3096548914909363, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.97265625, "step": 193 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4667326732673268, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.28350597609561756, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.30677290836653387, "calib/gap": -0.003932013201320195, "calib/mean_conf": 0.8811155378486056, "calib/mu_c": 0.8795333333333332, "calib/mu_w": 0.8834653465346534, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28350597609561756, "calib/std_conf": 0.04544983879206464, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.78445, "calib/step_q_c_n": 800.0, "calib/step_q_gap": 0.00851956521739139, "calib/step_q_w": 0.7759304347826086, "calib/step_q_w_n": 575.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 490.671875, "completions/mean_terminated_length": 494.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.20693333333333333, "grad_norm": 0.0054902262054383755, "learning_rate": 1.6666666666666668e-07, "loss": 0.0639, "num_tokens": 43540323.0, "reward": 1.07421875, "reward_std": 0.25600647926330566, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9765625, "step": 194 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5666205168776371, "calib/avg_num_step_conf": 5.2109375, "calib/ece": 0.26287401574803143, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.37401574803149606, "calib/gap": 0.012432753164556831, "calib/mean_conf": 0.8849212598425196, "calib/mu_c": 0.889620253164557, "calib/mu_w": 0.8771875000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.26287401574803143, "calib/std_conf": 0.04395543668490655, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7980699638118215, "calib/step_q_c_n": 829.0, "calib/step_q_gap": 0.009218478663306606, "calib/step_q_w": 0.7888514851485149, "calib/step_q_w_n": 505.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 508.24609375, "completions/mean_terminated_length": 508.24609375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.208, "grad_norm": 0.005612097214907408, "learning_rate": 1.3888888888888888e-07, "loss": 0.015, "num_tokens": 43776418.0, "reward": 1.10546875, "reward_std": 0.22084440290927887, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.9765625, "step": 195 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49657142857142855, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.2945882352941178, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.23921568627450981, "calib/gap": -0.0015619047619047643, "calib/mean_conf": 0.8771764705882353, "calib/mu_c": 0.8765333333333335, "calib/mu_w": 0.8780952380952383, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2917647058823531, "calib/std_conf": 0.041281770122004194, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8079945054945056, "calib/step_q_c_n": 728.0, "calib/step_q_gap": 0.004580776366490791, "calib/step_q_w": 0.8034137291280148, "calib/step_q_w_n": 539.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 435.8515625, "completions/mean_terminated_length": 435.8515625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.20906666666666668, "grad_norm": 0.005549365654587746, "learning_rate": 1.1111111111111112e-07, "loss": 0.0249, "num_tokens": 43990540.0, "reward": 1.080078125, "reward_std": 0.21700042486190796, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.98828125, "step": 196 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.510432, "calib/avg_num_step_conf": 5.94140625, "calib/ece": 0.3899200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.352, "calib/gap": 0.0014399999999999968, "calib/mean_conf": 0.8881600000000001, "calib/mu_c": 0.88888, "calib/mu_w": 0.88744, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3890400000000001, "calib/std_conf": 0.042263629754198825, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8044175491679274, "calib/step_q_c_n": 661.0, "calib/step_q_gap": 0.03278964219118319, "calib/step_q_w": 0.7716279069767442, "calib/step_q_w_n": 860.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 549.078125, "completions/mean_terminated_length": 551.2313842773438, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.21013333333333334, "grad_norm": 0.005433402024209499, "learning_rate": 8.333333333333334e-08, "loss": 0.1145, "num_tokens": 44236160.0, "reward": 0.97265625, "reward_std": 0.26813995838165283, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.96875, "step": 197 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4489684074790457, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.3200796812749005, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2908366533864542, "calib/gap": -0.007256608639587214, "calib/mean_conf": 0.8818326693227093, "calib/mu_c": 0.8786524822695035, "calib/mu_w": 0.8859090909090908, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3200796812749005, "calib/std_conf": 0.0451979696733806, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8025434243176177, "calib/step_q_c_n": 806.0, "calib/step_q_gap": 0.005530845701265497, "calib/step_q_w": 0.7970125786163522, "calib/step_q_w_n": 636.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 488.9453125, "completions/mean_terminated_length": 490.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.2112, "grad_norm": 0.005839452147483826, "learning_rate": 5.555555555555556e-08, "loss": 0.0501, "num_tokens": 44466714.0, "reward": 1.041015625, "reward_std": 0.25976496934890747, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49778464017185825, "calib/avg_num_step_conf": 5.3671875, "calib/ece": 0.27671999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.344, "calib/gap": -0.0021388292158970312, "calib/mean_conf": 0.8847200000000001, "calib/mu_c": 0.8838815789473684, "calib/mu_w": 0.8860204081632654, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.27671999999999997, "calib/std_conf": 0.050184874215245374, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8068908629441623, "calib/step_q_c_n": 788.0, "calib/step_q_gap": 0.04632772301242172, "calib/step_q_w": 0.7605631399317406, "calib/step_q_w_n": 586.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2923.0, "completions/max_terminated_length": 2923.0, "completions/mean_length": 576.2734375, "completions/mean_terminated_length": 576.2734375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.21226666666666666, "grad_norm": 0.005713994614779949, "learning_rate": 2.777777777777778e-08, "loss": 0.057, "num_tokens": 44718440.0, "reward": 1.076171875, "reward_std": 0.2899458706378937, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.96484375, "step": 199 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4710925039872408, "calib/avg_num_step_conf": 4.77734375, "calib/ece": 0.2786454183266933, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.30278884462151395, "calib/gap": -0.00536948431685258, "calib/mean_conf": 0.8842231075697211, "calib/mu_c": 0.8821052631578948, "calib/mu_w": 0.8874747474747474, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2786454183266933, "calib/std_conf": 0.04506407593797755, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8096626180836708, "calib/step_q_c_n": 741.0, "calib/step_q_gap": 0.04057548115421017, "calib/step_q_w": 0.7690871369294606, "calib/step_q_w_n": 482.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2292.0, "completions/max_terminated_length": 2292.0, "completions/mean_length": 498.34375, "completions/mean_terminated_length": 504.25299072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.21333333333333335, "grad_norm": 0.006134728901088238, "learning_rate": 0.0, "loss": -0.0127, "num_tokens": 44954064.0, "reward": 1.0859375, "reward_std": 0.2796323895454407, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9765625, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.0328808896424016, "train_runtime": 8801.639, "train_samples_per_second": 5.817, "train_steps_per_second": 0.023 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44954064, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }