{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.11328125, "calib/avg_num_step_conf": 0.33984375, "calib/ece": 0.4849999999999999, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7222222222222222, "calib/gap": -0.04555555555555568, "calib/mean_conf": 0.9349999999999999, "calib/mu_c": 0.9122222222222222, "calib/mu_w": 0.9577777777777778, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.45999999999999985, "calib/std_conf": 0.046338129248192805, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.8305405405405405, "calib/step_q_c_n": 37.0, "calib/step_q_gap": 0.11922054054054032, "calib/step_q_w": 0.7113200000000002, "calib/step_q_w_n": 50.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 705.6796875, "completions/mean_terminated_length": 759.0504760742188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.10124761611223221, "learning_rate": 0.0, "loss": 0.006, "num_tokens": 312046.0, "reward": 0.068359375, "reward_std": 0.08782906830310822, "rewards/accuracy_reward_step": 0.0390625, "rewards/format_reward_step": 0.05859375, "step": 1 }, { "calib/answer_extract_rate": 0.1171875, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.8683333333333332, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.20176470588235307, "calib/mean_conf": 0.8905555555555554, "calib/mu_c": 0.7, "calib/mu_w": 0.901764705882353, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.8516666666666666, "calib/std_conf": 0.1136501562897797, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.2, "calib/step_q_c_n": 2.0, "calib/step_q_gap": -0.5893243243243242, "calib/step_q_w": 0.7893243243243243, "calib/step_q_w_n": 74.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 739.44921875, "completions/mean_terminated_length": 792.0460205078125, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0021333333333333334, "grad_norm": 0.1276310682296753, "learning_rate": 2.5000000000000004e-07, "loss": 0.0175, "num_tokens": 628441.0, "reward": 0.033203125, "reward_std": 0.056683022528886795, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.05859375, "step": 2 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.1015625, "calib/ece": 0.6842857142857142, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.020999999999999908, "calib/mean_conf": 0.9699999999999999, "calib/mu_c": 0.985, "calib/mu_w": 0.9640000000000001, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.6842857142857142, "calib/std_conf": 0.03464101615137754, "calib/step_conf_rate": 0.01953125, "calib/step_q_c": 0.8636363636363636, "calib/step_q_c_n": 11.0, "calib/step_q_gap": -0.01703030303030295, "calib/step_q_w": 0.8806666666666666, "calib/step_q_w_n": 15.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 658.28515625, "completions/mean_terminated_length": 739.127197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0032, "grad_norm": 0.06745757162570953, "learning_rate": 5.000000000000001e-07, "loss": 0.0039, "num_tokens": 926026.0, "reward": 0.017578125, "reward_std": 0.03477538749575615, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.01171875, "step": 3 }, { "calib/answer_extract_rate": 0.078125, "calib/avg_num_step_conf": 0.16015625, "calib/ece": 0.6170000000000001, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": 0.08047619047619026, "calib/mean_conf": 0.9169999999999998, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.892857142857143, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.6170000000000001, "calib/std_conf": 0.07975587752636164, "calib/step_conf_rate": 0.0390625, "calib/step_q_c": 0.8742857142857143, "calib/step_q_c_n": 7.0, "calib/step_q_gap": 0.11869747899159677, "calib/step_q_w": 0.7555882352941176, "calib/step_q_w_n": 34.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 675.53515625, "completions/mean_terminated_length": 782.5203857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.004266666666666667, "grad_norm": 0.11923281848430634, "learning_rate": 7.5e-07, "loss": 0.0024, "num_tokens": 1228939.0, "reward": 0.0234375, "reward_std": 0.06247568503022194, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.0234375, "step": 4 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.1328125, "calib/ece": 0.576, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.018333333333333424, "calib/mean_conf": 0.976, "calib/mu_c": 0.965, "calib/mu_w": 0.9833333333333334, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.576, "calib/std_conf": 0.01019803902718558, "calib/step_conf_rate": 0.02734375, "calib/step_q_c": 0.857, "calib/step_q_c_n": 10.0, "calib/step_q_gap": 0.029916666666666702, "calib/step_q_w": 0.8270833333333333, "calib/step_q_w_n": 24.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2821.0, "completions/max_terminated_length": 2821.0, "completions/mean_length": 673.78125, "completions/mean_terminated_length": 749.9478149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.08307817578315735, "learning_rate": 1.0000000000000002e-06, "loss": 0.0135, "num_tokens": 1531923.0, "reward": 0.015625, "reward_std": 0.04037860035896301, "rewards/accuracy_reward_step": 0.0078125, "rewards/format_reward_step": 0.015625, "step": 5 }, { "calib/answer_extract_rate": 0.109375, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.7416666666666665, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": 0.011071428571428399, "calib/mean_conf": 0.9638888888888888, "calib/mu_c": 0.9725, "calib/mu_w": 0.9614285714285716, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.7416666666666665, "calib/std_conf": 0.03093701287183523, "calib/step_conf_rate": 0.06640625, "calib/step_q_c": 0.8311764705882353, "calib/step_q_c_n": 17.0, "calib/step_q_gap": -0.02761663286004057, "calib/step_q_w": 0.8587931034482759, "calib/step_q_w_n": 58.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 675.99609375, "completions/mean_terminated_length": 736.4042358398438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.12601716816425323, "learning_rate": 1.25e-06, "loss": 0.0076, "num_tokens": 1834738.0, "reward": 0.037109375, "reward_std": 0.0820886641740799, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.04296875, "step": 6 }, { "calib/answer_extract_rate": 0.08984375, "calib/avg_num_step_conf": 0.2578125, "calib/ece": 0.5982352941176472, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8235294117647058, "calib/gap": -0.008030303030303054, "calib/mean_conf": 0.9335294117647059, "calib/mu_c": 0.9283333333333333, "calib/mu_w": 0.9363636363636364, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.5894117647058824, "calib/std_conf": 0.06944665125290214, "calib/step_conf_rate": 0.06640625, "calib/step_q_c": 0.854074074074074, "calib/step_q_c_n": 27.0, "calib/step_q_gap": 0.007407407407407196, "calib/step_q_w": 0.8466666666666668, "calib/step_q_w_n": 39.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 718.94140625, "completions/mean_terminated_length": 810.7885131835938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.14266818761825562, "learning_rate": 1.5e-06, "loss": 0.0091, "num_tokens": 2150019.0, "reward": 0.048828125, "reward_std": 0.11278069764375687, "rewards/accuracy_reward_step": 0.0234375, "rewards/format_reward_step": 0.05078125, "step": 7 }, { "calib/answer_extract_rate": 0.11328125, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.6886666666666665, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8666666666666667, "calib/gap": -0.00045454545454548523, "calib/mean_conf": 0.9553333333333334, "calib/mu_c": 0.955, "calib/mu_w": 0.9554545454545454, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6886666666666665, "calib/std_conf": 0.03480740661921763, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.855, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.064795918367347, "calib/step_q_w": 0.790204081632653, "calib/step_q_w_n": 49.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 697.16796875, "completions/mean_terminated_length": 759.4680786132812, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.008533333333333334, "grad_norm": 0.13499559462070465, "learning_rate": 1.75e-06, "loss": 0.0125, "num_tokens": 2458814.0, "reward": 0.033203125, "reward_std": 0.08851936459541321, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.03515625, "step": 8 }, { "calib/answer_extract_rate": 0.07421875, "calib/avg_num_step_conf": 0.17578125, "calib/ece": 0.9137500000000001, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.625, "calib/mean_conf": 0.9137500000000001, "calib/mu_c": NaN, "calib/mu_w": 0.9137500000000001, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.9137500000000001, "calib/std_conf": 0.0823008961069076, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.9, "calib/step_q_c_n": 1.0, "calib/step_q_gap": 0.1890909090909092, "calib/step_q_w": 0.7109090909090908, "calib/step_q_w_n": 44.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 760.875, "completions/mean_terminated_length": 821.8733520507812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0096, "grad_norm": 0.0900043398141861, "learning_rate": 2.0000000000000003e-06, "loss": 0.0274, "num_tokens": 2784942.0, "reward": 0.01953125, "reward_std": 0.04761157184839249, "rewards/accuracy_reward_step": 0.00390625, "rewards/format_reward_step": 0.03125, "step": 9 }, { "calib/answer_extract_rate": 0.09765625, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.8388235294117645, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.8235294117647058, "calib/mean_conf": 0.8388235294117646, "calib/mu_c": NaN, "calib/mu_w": 0.8388235294117646, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.8388235294117645, "calib/std_conf": 0.27493062981609134, "calib/step_conf_rate": 0.08203125, "calib/step_q_w": 0.7589999999999999, "calib/step_q_w_n": 80.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 694.37890625, "completions/mean_terminated_length": 772.8739013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.13634684681892395, "learning_rate": 2.25e-06, "loss": 0.0209, "num_tokens": 3093311.0, "reward": 0.015625, "reward_std": 0.036563027650117874, "rewards/accuracy_reward_step": 0.0, "rewards/format_reward_step": 0.03125, "step": 10 }, { "calib/answer_extract_rate": 0.140625, "calib/avg_num_step_conf": 0.328125, "calib/ece": 0.8406666666666667, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.056428571428571384, "calib/mean_conf": 0.9073333333333333, "calib/mu_c": 0.96, "calib/mu_w": 0.9035714285714286, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.1640625, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.8406666666666667, "calib/std_conf": 0.1165313500975405, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.8475, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.07078947368421051, "calib/step_q_w": 0.7767105263157895, "calib/step_q_w_n": 76.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 671.57421875, "completions/mean_terminated_length": 734.7136840820312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.11572752147912979, "learning_rate": 2.5e-06, "loss": 0.0444, "num_tokens": 3393522.0, "reward": 0.033203125, "reward_std": 0.08088821172714233, "rewards/accuracy_reward_step": 0.01171875, "rewards/format_reward_step": 0.04296875, "step": 11 }, { "calib/answer_extract_rate": 0.140625, "calib/avg_num_step_conf": 0.4609375, "calib/ece": 0.6818750000000001, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": -0.037941176470588034, "calib/mean_conf": 0.936875, "calib/mu_c": 0.9100000000000001, "calib/mu_w": 0.9479411764705882, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6635416666666668, "calib/std_conf": 0.0882918043101019, "calib/step_conf_rate": 0.1171875, "calib/step_q_c": 0.7531914893617022, "calib/step_q_c_n": 47.0, "calib/step_q_gap": -0.07110428528618495, "calib/step_q_w": 0.8242957746478872, "calib/step_q_w_n": 71.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 731.23828125, "completions/mean_terminated_length": 789.8607177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.21758244931697845, "learning_rate": 2.7500000000000004e-06, "loss": 0.0009, "num_tokens": 3708703.0, "reward": 0.0703125, "reward_std": 0.1610427349805832, "rewards/accuracy_reward_step": 0.03515625, "rewards/format_reward_step": 0.0703125, "step": 12 }, { "calib/answer_extract_rate": 0.17578125, "calib/avg_num_step_conf": 0.6015625, "calib/ece": 0.765, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.03386363636363643, "calib/mean_conf": 0.9188461538461538, "calib/mu_c": 0.9475, "calib/mu_w": 0.9136363636363636, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.19921875, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.765, "calib/std_conf": 0.09608997878899045, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.8533333333333333, "calib/step_q_c_n": 18.0, "calib/step_q_gap": 0.06563039215686273, "calib/step_q_w": 0.7877029411764706, "calib/step_q_w_n": 136.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 633.6171875, "completions/mean_terminated_length": 708.3231811523438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.15466050803661346, "learning_rate": 3e-06, "loss": 0.0635, "num_tokens": 3999309.0, "reward": 0.052734375, "reward_std": 0.13007746636867523, "rewards/accuracy_reward_step": 0.015625, "rewards/format_reward_step": 0.07421875, "step": 13 }, { "calib/answer_extract_rate": 0.2578125, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.7146153846153847, "calib/final_conf_rate": 0.203125, "calib/format_rate": 0.1875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": -0.048248337028824695, "calib/mean_conf": 0.8907692307692308, "calib/mu_c": 0.8527272727272728, "calib/mu_w": 0.9009756097560975, "calib/nonempty_final_conf_rate": 0.203125, "calib/nonempty_reasoning_rate": 0.29296875, "calib/nonempty_step_conf_rate": 0.234375, "calib/pce": 0.696923076923077, "calib/std_conf": 0.20182671693469884, "calib/step_conf_rate": 0.234375, "calib/step_q_c": 0.7833962264150944, "calib/step_q_c_n": 53.0, "calib/step_q_gap": -0.018564557898630873, "calib/step_q_w": 0.8019607843137253, "calib/step_q_w_n": 204.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 588.078125, "completions/mean_terminated_length": 643.3675537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.014933333333333333, "grad_norm": 0.2625541388988495, "learning_rate": 3.2500000000000002e-06, "loss": 0.0223, "num_tokens": 4279065.0, "reward": 0.1484375, "reward_std": 0.19539156556129456, "rewards/accuracy_reward_step": 0.0546875, "rewards/format_reward_step": 0.1875, "step": 14 }, { "calib/answer_extract_rate": 0.33984375, "calib/avg_num_step_conf": 1.3515625, "calib/ece": 0.6461194029850745, "calib/final_conf_rate": 0.26171875, "calib/format_rate": 0.23046875, "calib/frac_conf_gt_0.9": 0.746268656716418, "calib/gap": 0.04512941176470597, "calib/mean_conf": 0.8998507462686566, "calib/mu_c": 0.9335294117647058, "calib/mu_w": 0.8883999999999999, "calib/nonempty_final_conf_rate": 0.26171875, "calib/nonempty_reasoning_rate": 0.37109375, "calib/nonempty_step_conf_rate": 0.296875, "calib/pce": 0.6461194029850745, "calib/std_conf": 0.16033074525048963, "calib/step_conf_rate": 0.296875, "calib/step_q_c": 0.8161428571428572, "calib/step_q_c_n": 70.0, "calib/step_q_gap": 0.07853416149068326, "calib/step_q_w": 0.7376086956521739, "calib/step_q_w_n": 276.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 566.6953125, "completions/mean_terminated_length": 601.9668579101562, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.31918662786483765, "learning_rate": 3.5e-06, "loss": 0.0979, "num_tokens": 4555827.0, "reward": 0.181640625, "reward_std": 0.31031230092048645, "rewards/accuracy_reward_step": 0.06640625, "rewards/format_reward_step": 0.23046875, "step": 15 }, { "calib/answer_extract_rate": 0.4296875, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.6019709677419354, "calib/final_conf_rate": 0.36328125, "calib/format_rate": 0.3046875, "calib/frac_conf_gt_0.9": 0.7956989247311828, "calib/gap": 0.0006426229508195158, "calib/mean_conf": 0.9320784946236558, "calib/mu_c": 0.9325, "calib/mu_w": 0.9318573770491805, "calib/nonempty_final_conf_rate": 0.36328125, "calib/nonempty_reasoning_rate": 0.4921875, "calib/nonempty_step_conf_rate": 0.390625, "calib/pce": 0.5949817204301074, "calib/std_conf": 0.10571686252150013, "calib/step_conf_rate": 0.390625, "calib/step_q_c": 0.7988636363636363, "calib/step_q_c_n": 132.0, "calib/step_q_gap": 0.038531544673092055, "calib/step_q_w": 0.7603320916905443, "calib/step_q_w_n": 349.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 532.25, "completions/mean_terminated_length": 565.3776245117188, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.017066666666666667, "grad_norm": 0.3399321436882019, "learning_rate": 3.7500000000000005e-06, "loss": 0.1148, "num_tokens": 4824739.0, "reward": 0.28125, "reward_std": 0.3410623073577881, "rewards/accuracy_reward_step": 0.12890625, "rewards/format_reward_step": 0.3046875, "step": 16 }, { "calib/answer_extract_rate": 0.56640625, "calib/avg_num_step_conf": 2.45703125, "calib/ece": 0.6452459016393444, "calib/final_conf_rate": 0.4765625, "calib/format_rate": 0.4375, "calib/frac_conf_gt_0.9": 0.7213114754098361, "calib/gap": -0.03744832041343693, "calib/mean_conf": 0.9127868852459018, "calib/mu_c": 0.8863888888888889, "calib/mu_w": 0.9238372093023258, "calib/nonempty_final_conf_rate": 0.4765625, "calib/nonempty_reasoning_rate": 0.63671875, "calib/nonempty_step_conf_rate": 0.56640625, "calib/pce": 0.6314754098360658, "calib/std_conf": 0.1096511778851909, "calib/step_conf_rate": 0.56640625, "calib/step_q_c": 0.7571951219512195, "calib/step_q_c_n": 164.0, "calib/step_q_gap": -0.023579071597167545, "calib/step_q_w": 0.7807741935483871, "calib/step_q_w_n": 465.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 443.328125, "completions/mean_terminated_length": 459.4817810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.018133333333333335, "grad_norm": 0.40701761841773987, "learning_rate": 4.000000000000001e-06, "loss": 0.0584, "num_tokens": 5065567.0, "reward": 0.3828125, "reward_std": 0.4032036066055298, "rewards/accuracy_reward_step": 0.1640625, "rewards/format_reward_step": 0.4375, "step": 17 }, { "calib/answer_extract_rate": 0.63671875, "calib/avg_num_step_conf": 2.8515625, "calib/ece": 0.6883430656934306, "calib/final_conf_rate": 0.53515625, "calib/format_rate": 0.47265625, "calib/frac_conf_gt_0.9": 0.8467153284671532, "calib/gap": 0.0022530253025297986, "calib/mean_conf": 0.9511167883211679, "calib/mu_c": 0.9527777777777775, "calib/mu_w": 0.9505247524752477, "calib/nonempty_final_conf_rate": 0.53515625, "calib/nonempty_reasoning_rate": 0.71484375, "calib/nonempty_step_conf_rate": 0.63671875, "calib/pce": 0.6883430656934306, "calib/std_conf": 0.06206166767066995, "calib/step_conf_rate": 0.63671875, "calib/step_q_c": 0.787375, "calib/step_q_c_n": 160.0, "calib/step_q_gap": -0.01965078947368404, "calib/step_q_w": 0.8070257894736841, "calib/step_q_w_n": 570.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3052.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 494.27734375, "completions/mean_terminated_length": 504.12353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0192, "grad_norm": 0.28891655802726746, "learning_rate": 4.25e-06, "loss": 0.0822, "num_tokens": 5326630.0, "reward": 0.400390625, "reward_std": 0.41276422142982483, "rewards/accuracy_reward_step": 0.1640625, "rewards/format_reward_step": 0.47265625, "step": 18 }, { "calib/answer_extract_rate": 0.91015625, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.7442465116279071, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.7534883720930232, "calib/gap": 0.005089999999999928, "calib/mean_conf": 0.9221069767441861, "calib/mu_c": 0.9262499999999999, "calib/mu_w": 0.92116, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.7401534883720932, "calib/std_conf": 0.11782830511472514, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.7984269662921348, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.009195450479703049, "calib/step_q_w": 0.7892315158124318, "calib/step_q_w_n": 917.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 351.328125, "completions/mean_terminated_length": 354.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.020266666666666665, "grad_norm": 0.33571699261665344, "learning_rate": 4.5e-06, "loss": 0.0736, "num_tokens": 5545138.0, "reward": 0.576171875, "reward_std": 0.34173643589019775, "rewards/accuracy_reward_step": 0.171875, "rewards/format_reward_step": 0.80859375, "step": 19 }, { "calib/answer_extract_rate": 0.921875, "calib/avg_num_step_conf": 3.984375, "calib/ece": 0.6350000000000002, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.8230088495575221, "calib/gap": -0.021809177646524458, "calib/mean_conf": 0.9403097345132743, "calib/mu_c": 0.9253521126760562, "calib/mu_w": 0.9471612903225807, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.6305752212389383, "calib/std_conf": 0.0730401247037796, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.7642567567567567, "calib/step_q_c_n": 296.0, "calib/step_q_gap": -0.03545180677915494, "calib/step_q_w": 0.7997085635359117, "calib/step_q_w_n": 724.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 331.35546875, "completions/mean_terminated_length": 332.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.021333333333333333, "grad_norm": 0.3089500963687897, "learning_rate": 4.75e-06, "loss": 0.1082, "num_tokens": 5758645.0, "reward": 0.7109375, "reward_std": 0.38752901554107666, "rewards/accuracy_reward_step": 0.29296875, "rewards/format_reward_step": 0.8359375, "step": 20 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 4.43359375, "calib/ece": 0.6724793388429753, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.743801652892562, "calib/gap": 0.02329861425595514, "calib/mean_conf": 0.9140495867768595, "calib/mu_c": 0.9314754098360656, "calib/mu_w": 0.9081767955801104, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.6672314049586777, "calib/std_conf": 0.14302850962638514, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7598765432098764, "calib/step_q_c_n": 243.0, "calib/step_q_gap": -0.023733322260975576, "calib/step_q_w": 0.783609865470852, "calib/step_q_w_n": 892.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2673.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 321.828125, "completions/mean_terminated_length": 323.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.0224, "grad_norm": 0.22014741599559784, "learning_rate": 5e-06, "loss": 0.0298, "num_tokens": 5967801.0, "reward": 0.70703125, "reward_std": 0.3434026837348938, "rewards/accuracy_reward_step": 0.24609375, "rewards/format_reward_step": 0.921875, "step": 21 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.41796875, "calib/ece": 0.6465617529880476, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8047808764940239, "calib/gap": 0.027354009542866042, "calib/mean_conf": 0.9318207171314741, "calib/mu_c": 0.9512191780821919, "calib/mu_w": 0.9238651685393259, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6437729083665338, "calib/std_conf": 0.11622469192487259, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7953690058479533, "calib/step_q_c_n": 342.0, "calib/step_q_gap": 0.016090171880906445, "calib/step_q_w": 0.7792788339670469, "calib/step_q_w_n": 789.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 292.609375, "completions/mean_terminated_length": 292.609375, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.023466666666666667, "grad_norm": 0.25837308168411255, "learning_rate": 4.9722222222222224e-06, "loss": 0.0051, "num_tokens": 6168333.0, "reward": 0.76953125, "reward_std": 0.37649795413017273, "rewards/accuracy_reward_step": 0.2890625, "rewards/format_reward_step": 0.9609375, "step": 22 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.6694779116465864, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.8433734939759037, "calib/gap": -0.0037214684756584626, "calib/mean_conf": 0.9473895582329317, "calib/mu_c": 0.9447142857142856, "calib/mu_w": 0.9484357541899441, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6678714859437752, "calib/std_conf": 0.05626255193649903, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7870394736842105, "calib/step_q_c_n": 304.0, "calib/step_q_gap": 0.0010901065956029354, "calib/step_q_w": 0.7859493670886075, "calib/step_q_w_n": 790.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 302.87890625, "completions/mean_terminated_length": 304.0666809082031, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.024533333333333334, "grad_norm": 0.26379406452178955, "learning_rate": 4.944444444444445e-06, "loss": -0.0043, "num_tokens": 6373614.0, "reward": 0.74609375, "reward_std": 0.3697567582130432, "rewards/accuracy_reward_step": 0.27734375, "rewards/format_reward_step": 0.9375, "step": 23 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.546875, "calib/ece": 0.7166532258064517, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7862903225806451, "calib/gap": 0.0024179970972422193, "calib/mean_conf": 0.9303629032258065, "calib/mu_c": 0.9322641509433962, "calib/mu_w": 0.929846153846154, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7166532258064517, "calib/std_conf": 0.10919181366850009, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8040654205607476, "calib/step_q_c_n": 214.0, "calib/step_q_gap": 0.00751805213969492, "calib/step_q_w": 0.7965473684210527, "calib/step_q_w_n": 950.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1059.0, "completions/max_terminated_length": 1059.0, "completions/mean_length": 286.80078125, "completions/mean_terminated_length": 286.80078125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.0256, "grad_norm": 0.256256639957428, "learning_rate": 4.9166666666666665e-06, "loss": 0.025, "num_tokens": 6575355.0, "reward": 0.685546875, "reward_std": 0.34957581758499146, "rewards/accuracy_reward_step": 0.20703125, "rewards/format_reward_step": 0.95703125, "step": 24 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.6953125, "calib/ece": 0.6879785809906291, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8433734939759037, "calib/gap": -0.0027747747747748353, "calib/mean_conf": 0.9353949129852746, "calib/mu_c": 0.9333333333333333, "calib/mu_w": 0.9361081081081082, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6831726907630522, "calib/std_conf": 0.11454053393592831, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.819756838905775, "calib/step_q_c_n": 329.0, "calib/step_q_gap": 0.02302946204437739, "calib/step_q_w": 0.7967273768613976, "calib/step_q_w_n": 873.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 871.0, "completions/max_terminated_length": 871.0, "completions/mean_length": 288.19140625, "completions/mean_terminated_length": 289.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.02666666666666667, "grad_norm": 0.26059645414352417, "learning_rate": 4.888888888888889e-06, "loss": -0.0084, "num_tokens": 6776164.0, "reward": 0.73828125, "reward_std": 0.35654735565185547, "rewards/accuracy_reward_step": 0.2578125, "rewards/format_reward_step": 0.9609375, "step": 25 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 4.8671875, "calib/ece": 0.7299335989375831, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.8884462151394422, "calib/gap": 0.007348290598290563, "calib/mean_conf": 0.9530411686586986, "calib/mu_c": 0.95875, "calib/mu_w": 0.9514017094017094, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7299335989375831, "calib/std_conf": 0.08047558716130868, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8019241877256318, "calib/step_q_c_n": 277.0, "calib/step_q_gap": -0.005229303846435718, "calib/step_q_w": 0.8071534915720675, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1940.0, "completions/max_terminated_length": 1940.0, "completions/mean_length": 332.44140625, "completions/mean_terminated_length": 332.44140625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.027733333333333332, "grad_norm": 0.19963575899600983, "learning_rate": 4.861111111111111e-06, "loss": 0.0327, "num_tokens": 6990317.0, "reward": 0.701171875, "reward_std": 0.2856733798980713, "rewards/accuracy_reward_step": 0.21875, "rewards/format_reward_step": 0.96484375, "step": 26 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.7093019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9137254901960784, "calib/gap": 0.004200892857142646, "calib/mean_conf": 0.9563607843137255, "calib/mu_c": 0.9595238095238094, "calib/mu_w": 0.9553229166666668, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.7093019607843137, "calib/std_conf": 0.04585770009723973, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7931908424908425, "calib/step_q_c_n": 273.0, "calib/step_q_gap": -0.01610196934848085, "calib/step_q_w": 0.8092928118393233, "calib/step_q_w_n": 946.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 677.0, "completions/max_terminated_length": 677.0, "completions/mean_length": 297.78125, "completions/mean_terminated_length": 298.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.0288, "grad_norm": 0.255687952041626, "learning_rate": 4.833333333333333e-06, "loss": 0.0145, "num_tokens": 7195573.0, "reward": 0.734375, "reward_std": 0.3340994119644165, "rewards/accuracy_reward_step": 0.24609375, "rewards/format_reward_step": 0.9765625, "step": 27 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.0390625, "calib/ece": 0.5988373015873018, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8690476190476191, "calib/gap": 0.004648148148148401, "calib/mean_conf": 0.9520119047619048, "calib/mu_c": 0.9550000000000002, "calib/mu_w": 0.9503518518518518, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5968531746031748, "calib/std_conf": 0.08081894727864886, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8013708920187794, "calib/step_q_c_n": 426.0, "calib/step_q_gap": -0.028470774647887276, "calib/step_q_w": 0.8298416666666667, "calib/step_q_w_n": 864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1098.0, "completions/max_terminated_length": 1098.0, "completions/mean_length": 325.05859375, "completions/mean_terminated_length": 326.3333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.029866666666666666, "grad_norm": 0.22692817449569702, "learning_rate": 4.805555555555556e-06, "loss": 0.0005, "num_tokens": 7409540.0, "reward": 0.83984375, "reward_std": 0.3686498999595642, "rewards/accuracy_reward_step": 0.3515625, "rewards/format_reward_step": 0.9765625, "step": 28 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.7113740157480315, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.905511811023622, "calib/gap": -0.004400149588631441, "calib/mean_conf": 0.9575944881889763, "calib/mu_c": 0.9542857142857143, "calib/mu_w": 0.9586858638743457, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7104685039370079, "calib/std_conf": 0.044223449808071566, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8052698412698414, "calib/step_q_c_n": 315.0, "calib/step_q_gap": 0.00023488266818538772, "calib/step_q_w": 0.805034958601656, "calib/step_q_w_n": 1087.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 364.38671875, "completions/mean_terminated_length": 364.38671875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.030933333333333334, "grad_norm": 0.17971405386924744, "learning_rate": 4.777777777777778e-06, "loss": 0.0278, "num_tokens": 7633759.0, "reward": 0.734375, "reward_std": 0.31729739904403687, "rewards/accuracy_reward_step": 0.24609375, "rewards/format_reward_step": 0.9765625, "step": 29 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 5.78515625, "calib/ece": 0.6281944444444444, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9047619047619048, "calib/gap": 0.0041780138304698955, "calib/mean_conf": 0.9575595238095238, "calib/mu_c": 0.9603614457831327, "calib/mu_w": 0.9561834319526628, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6281944444444444, "calib/std_conf": 0.044819926617470765, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7859325842696628, "calib/step_q_c_n": 445.0, "calib/step_q_gap": -0.007205157043078447, "calib/step_q_w": 0.7931377413127413, "calib/step_q_w_n": 1036.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 383.33984375, "completions/mean_terminated_length": 384.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.032, "grad_norm": 0.2327258139848709, "learning_rate": 4.75e-06, "loss": -0.0201, "num_tokens": 7862686.0, "reward": 0.806640625, "reward_std": 0.43618446588516235, "rewards/accuracy_reward_step": 0.32421875, "rewards/format_reward_step": 0.96484375, "step": 30 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.68082, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.908, "calib/gap": 0.01231669521246248, "calib/mean_conf": 0.9488199999999999, "calib/mu_c": 0.9578358208955225, "calib/mu_w": 0.9455191256830601, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.68082, "calib/std_conf": 0.09194078311609054, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8134969325153374, "calib/step_q_c_n": 326.0, "calib/step_q_gap": -0.0012135513556303934, "calib/step_q_w": 0.8147104838709678, "calib/step_q_w_n": 1240.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 975.0, "completions/max_terminated_length": 975.0, "completions/mean_length": 384.53515625, "completions/mean_terminated_length": 386.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.03306666666666667, "grad_norm": 0.19871972501277924, "learning_rate": 4.722222222222222e-06, "loss": 0.0345, "num_tokens": 8090847.0, "reward": 0.74609375, "reward_std": 0.2939828038215637, "rewards/accuracy_reward_step": 0.26171875, "rewards/format_reward_step": 0.96875, "step": 31 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.6171875, "calib/ece": 0.61597609561753, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8924302788844621, "calib/gap": 0.0003054571226080993, "calib/mean_conf": 0.9546215139442231, "calib/mu_c": 0.9548235294117648, "calib/mu_w": 0.9545180722891567, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.61597609561753, "calib/std_conf": 0.04114104801741104, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7840430107526881, "calib/step_q_c_n": 465.0, "calib/step_q_gap": 0.006000873034291421, "calib/step_q_w": 0.7780421377183967, "calib/step_q_w_n": 973.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1993.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 395.8515625, "completions/mean_terminated_length": 395.8515625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.034133333333333335, "grad_norm": 0.19935005903244019, "learning_rate": 4.694444444444445e-06, "loss": 0.065, "num_tokens": 8322697.0, "reward": 0.82421875, "reward_std": 0.32162073254585266, "rewards/accuracy_reward_step": 0.3359375, "rewards/format_reward_step": 0.9765625, "step": 32 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 6.15234375, "calib/ece": 0.6119277108433736, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9317269076305221, "calib/gap": 0.013934002869440354, "calib/mean_conf": 0.9532931726907631, "calib/mu_c": 0.9624705882352942, "calib/mu_w": 0.9485365853658538, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6119277108433736, "calib/std_conf": 0.09626560476211199, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7924902723735409, "calib/step_q_c_n": 514.0, "calib/step_q_gap": -0.004799360048702295, "calib/step_q_w": 0.7972896324222432, "calib/step_q_w_n": 1061.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2053.0, "completions/max_terminated_length": 2053.0, "completions/mean_length": 441.0703125, "completions/mean_terminated_length": 441.0703125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.0352, "grad_norm": 0.17285677790641785, "learning_rate": 4.666666666666667e-06, "loss": 0.0726, "num_tokens": 8566291.0, "reward": 0.814453125, "reward_std": 0.29825982451438904, "rewards/accuracy_reward_step": 0.33203125, "rewards/format_reward_step": 0.96484375, "step": 33 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.6627509881422926, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.924901185770751, "calib/gap": -0.00461513108614231, "calib/mean_conf": 0.9591936758893281, "calib/mu_c": 0.9559466666666668, "calib/mu_w": 0.9605617977528091, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6627509881422926, "calib/std_conf": 0.03106359089630838, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.794635103926097, "calib/step_q_c_n": 433.0, "calib/step_q_gap": 0.00898895652241194, "calib/step_q_w": 0.7856461474036851, "calib/step_q_w_n": 1194.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2071.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 403.58984375, "completions/mean_terminated_length": 403.58984375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.03626666666666667, "grad_norm": 0.20357801020145416, "learning_rate": 4.638888888888889e-06, "loss": 0.0515, "num_tokens": 8798530.0, "reward": 0.78125, "reward_std": 0.33316582441329956, "rewards/accuracy_reward_step": 0.29296875, "rewards/format_reward_step": 0.9765625, "step": 34 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 6.57421875, "calib/ece": 0.6264516129032259, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.9354838709677419, "calib/gap": 0.011807228915662549, "calib/mean_conf": 0.9570967741935484, "calib/mu_c": 0.965, "calib/mu_w": 0.9531927710843374, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6264516129032259, "calib/std_conf": 0.07159219404138058, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7909735349716447, "calib/step_q_c_n": 529.0, "calib/step_q_gap": 0.006346151956046708, "calib/step_q_w": 0.784627383015598, "calib/step_q_w_n": 1154.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2484.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 503.55859375, "completions/mean_terminated_length": 505.5333557128906, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.037333333333333336, "grad_norm": 0.21932625770568848, "learning_rate": 4.611111111111112e-06, "loss": 0.0716, "num_tokens": 9060505.0, "reward": 0.798828125, "reward_std": 0.366168349981308, "rewards/accuracy_reward_step": 0.3203125, "rewards/format_reward_step": 0.95703125, "step": 35 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.4248616600790514, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8972332015810277, "calib/gap": -0.004706842435655889, "calib/mean_conf": 0.9554545454545454, "calib/mu_c": 0.9532592592592593, "calib/mu_w": 0.9579661016949151, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4233596837944664, "calib/std_conf": 0.03562389876379772, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7755802469135803, "calib/step_q_c_n": 810.0, "calib/step_q_gap": -0.006090671453766516, "calib/step_q_w": 0.7816709183673468, "calib/step_q_w_n": 784.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2079.0, "completions/max_terminated_length": 2079.0, "completions/mean_length": 412.609375, "completions/mean_terminated_length": 412.609375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.0384, "grad_norm": 0.21121746301651, "learning_rate": 4.583333333333333e-06, "loss": 0.0364, "num_tokens": 9292653.0, "reward": 1.01953125, "reward_std": 0.3650910258293152, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.984375, "step": 36 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 5.96875, "calib/ece": 0.5700793650793651, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8809523809523809, "calib/gap": 0.0042899900232789, "calib/mean_conf": 0.9526190476190476, "calib/mu_c": 0.955257731958763, "calib/mu_w": 0.9509677419354841, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5688888888888889, "calib/std_conf": 0.04374671999366445, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7560966542750929, "calib/step_q_c_n": 538.0, "calib/step_q_gap": 0.016074432052870624, "calib/step_q_w": 0.7400222222222222, "calib/step_q_w_n": 990.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2471.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 427.5390625, "completions/mean_terminated_length": 427.5390625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.039466666666666664, "grad_norm": 0.2129954993724823, "learning_rate": 4.555555555555556e-06, "loss": 0.0473, "num_tokens": 9533007.0, "reward": 0.869140625, "reward_std": 0.3198983669281006, "rewards/accuracy_reward_step": 0.37890625, "rewards/format_reward_step": 0.98046875, "step": 37 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 5.9453125, "calib/ece": 0.5087698412698413, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8611111111111112, "calib/gap": 0.006982940387195491, "calib/mean_conf": 0.9492460317460316, "calib/mu_c": 0.9531531531531531, "calib/mu_w": 0.9461702127659576, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5087698412698413, "calib/std_conf": 0.06292338330694436, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7546644844517184, "calib/step_q_c_n": 611.0, "calib/step_q_gap": 0.006673266010445156, "calib/step_q_w": 0.7479912184412733, "calib/step_q_w_n": 911.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2220.0, "completions/max_terminated_length": 2220.0, "completions/mean_length": 427.37890625, "completions/mean_terminated_length": 430.74407958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.04053333333333333, "grad_norm": 0.20735085010528564, "learning_rate": 4.527777777777778e-06, "loss": 0.0576, "num_tokens": 9773112.0, "reward": 0.919921875, "reward_std": 0.3206837773323059, "rewards/accuracy_reward_step": 0.43359375, "rewards/format_reward_step": 0.97265625, "step": 38 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.85546875, "calib/ece": 0.5436758893280633, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.8893280632411067, "calib/gap": 0.021607843137254834, "calib/mean_conf": 0.9389328063241107, "calib/mu_c": 0.9520000000000001, "calib/mu_w": 0.9303921568627452, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5436758893280633, "calib/std_conf": 0.12431332112497273, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7363853211009175, "calib/step_q_c_n": 545.0, "calib/step_q_gap": 0.03206666281999504, "calib/step_q_w": 0.7043186582809224, "calib/step_q_w_n": 954.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2379.0, "completions/max_terminated_length": 2379.0, "completions/mean_length": 444.26171875, "completions/mean_terminated_length": 444.26171875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.0416, "grad_norm": 0.18263089656829834, "learning_rate": 4.5e-06, "loss": 0.0384, "num_tokens": 10016739.0, "reward": 0.884765625, "reward_std": 0.2555467486381531, "rewards/accuracy_reward_step": 0.390625, "rewards/format_reward_step": 0.98828125, "step": 39 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.55859375, "calib/ece": 0.5652283464566932, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9094488188976378, "calib/gap": -0.0035459107201043505, "calib/mean_conf": 0.954992125984252, "calib/mu_c": 0.9528282828282827, "calib/mu_w": 0.956374193548387, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5652283464566932, "calib/std_conf": 0.028724456934620224, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7275092936802975, "calib/step_q_c_n": 538.0, "calib/step_q_gap": -0.03869409615021091, "calib/step_q_w": 0.7662033898305084, "calib/step_q_w_n": 885.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 449.60546875, "completions/mean_terminated_length": 449.60546875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.042666666666666665, "grad_norm": 0.19915395975112915, "learning_rate": 4.472222222222223e-06, "loss": 0.0304, "num_tokens": 10262406.0, "reward": 0.8828125, "reward_std": 0.34560567140579224, "rewards/accuracy_reward_step": 0.390625, "rewards/format_reward_step": 0.984375, "step": 40 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.28125, "calib/ece": 0.3246456692913384, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.8228346456692913, "calib/gap": 0.008961849103683917, "calib/mean_conf": 0.942755905511811, "calib/mu_c": 0.9461783439490447, "calib/mu_w": 0.9372164948453607, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3246456692913384, "calib/std_conf": 0.040657531972191956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7432474226804123, "calib/step_q_c_n": 776.0, "calib/step_q_gap": 0.03934117268041237, "calib/step_q_w": 0.70390625, "calib/step_q_w_n": 576.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1955.0, "completions/max_terminated_length": 1955.0, "completions/mean_length": 390.625, "completions/mean_terminated_length": 390.625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.04373333333333333, "grad_norm": 0.22646979987621307, "learning_rate": 4.444444444444444e-06, "loss": 0.0179, "num_tokens": 10493462.0, "reward": 1.107421875, "reward_std": 0.3214534521102905, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.98828125, "step": 41 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.15234375, "calib/ece": 0.4643700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8346456692913385, "calib/gap": 0.0007810849437642764, "calib/mean_conf": 0.940748031496063, "calib/mu_c": 0.9411570247933884, "calib/mu_w": 0.9403759398496241, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4643700787401575, "calib/std_conf": 0.041665988793130104, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6892431561996779, "calib/step_q_c_n": 621.0, "calib/step_q_gap": -0.0056120013934453095, "calib/step_q_w": 0.6948551575931232, "calib/step_q_w_n": 698.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1862.0, "completions/max_terminated_length": 1862.0, "completions/mean_length": 364.67578125, "completions/mean_terminated_length": 364.67578125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.0448, "grad_norm": 0.2288183718919754, "learning_rate": 4.416666666666667e-06, "loss": 0.0256, "num_tokens": 10714995.0, "reward": 0.96875, "reward_std": 0.3018547296524048, "rewards/accuracy_reward_step": 0.47265625, "rewards/format_reward_step": 0.9921875, "step": 42 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.45517647058823535, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8392156862745098, "calib/gap": 0.012088771238611296, "calib/mean_conf": 0.9414509803921568, "calib/mu_c": 0.9476612903225807, "calib/mu_w": 0.9355725190839694, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.45517647058823535, "calib/std_conf": 0.05712604355265649, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7093910256410256, "calib/step_q_c_n": 624.0, "calib/step_q_gap": 0.01144540630567814, "calib/step_q_w": 0.6979456193353475, "calib/step_q_w_n": 662.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1429.0, "completions/max_terminated_length": 1429.0, "completions/mean_length": 407.31640625, "completions/mean_terminated_length": 408.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.04586666666666667, "grad_norm": 0.2891763150691986, "learning_rate": 4.388888888888889e-06, "loss": 0.0006, "num_tokens": 10948300.0, "reward": 0.982421875, "reward_std": 0.3541671335697174, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.99609375, "step": 43 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.171875, "calib/ece": 0.5323320158102767, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8102766798418972, "calib/gap": 0.017405531749123737, "calib/mean_conf": 0.935494071146245, "calib/mu_c": 0.9458823529411766, "calib/mu_w": 0.9284768211920529, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5323320158102767, "calib/std_conf": 0.06607766543989756, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6990192307692307, "calib/step_q_c_n": 520.0, "calib/step_q_gap": -0.01525937619594342, "calib/step_q_w": 0.7142786069651741, "calib/step_q_w_n": 804.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1834.0, "completions/max_terminated_length": 1834.0, "completions/mean_length": 428.85546875, "completions/mean_terminated_length": 428.85546875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.046933333333333334, "grad_norm": 0.21532444655895233, "learning_rate": 4.361111111111112e-06, "loss": 0.014, "num_tokens": 11188215.0, "reward": 0.890625, "reward_std": 0.27451932430267334, "rewards/accuracy_reward_step": 0.3984375, "rewards/format_reward_step": 0.984375, "step": 44 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 4.734375, "calib/ece": 0.4757936507936507, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.7658730158730159, "calib/gap": 0.021655225019069246, "calib/mean_conf": 0.9218253968253968, "calib/mu_c": 0.9336842105263157, "calib/mu_w": 0.9120289855072464, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.47261904761904755, "calib/std_conf": 0.11485256797683835, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7284021543985638, "calib/step_q_c_n": 557.0, "calib/step_q_gap": 0.03238688722299132, "calib/step_q_w": 0.6960152671755725, "calib/step_q_w_n": 655.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2585.0, "completions/max_terminated_length": 2585.0, "completions/mean_length": 417.9921875, "completions/mean_terminated_length": 417.9921875, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.048, "grad_norm": 0.22201699018478394, "learning_rate": 4.333333333333334e-06, "loss": 0.0488, "num_tokens": 11424077.0, "reward": 0.9375, "reward_std": 0.2813979387283325, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.984375, "step": 45 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.49800796812748993, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7768924302788844, "calib/gap": -0.005764388489208683, "calib/mean_conf": 0.9394422310756972, "calib/mu_c": 0.9362499999999999, "calib/mu_w": 0.9420143884892086, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.49561752988047797, "calib/std_conf": 0.04473569625394344, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7082641509433962, "calib/step_q_c_n": 530.0, "calib/step_q_gap": 0.020360279114290725, "calib/step_q_w": 0.6879038718291055, "calib/step_q_w_n": 749.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 442.06640625, "completions/mean_terminated_length": 442.06640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.04906666666666667, "grad_norm": 0.22355523705482483, "learning_rate": 4.305555555555556e-06, "loss": 0.0527, "num_tokens": 11665822.0, "reward": 0.927734375, "reward_std": 0.2827729284763336, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.98046875, "step": 46 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.4428346456692914, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7244094488188977, "calib/gap": 0.009217984496123988, "calib/mean_conf": 0.9325984251968504, "calib/mu_c": 0.93728, "calib/mu_w": 0.928062015503876, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4416535433070867, "calib/std_conf": 0.0665305873462583, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6843163097199341, "calib/step_q_c_n": 607.0, "calib/step_q_gap": 0.04003059543421972, "calib/step_q_w": 0.6442857142857144, "calib/step_q_w_n": 679.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 424.06640625, "completions/mean_terminated_length": 424.06640625, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 0.050133333333333335, "grad_norm": 0.2427399754524231, "learning_rate": 4.277777777777778e-06, "loss": -0.0224, "num_tokens": 11904167.0, "reward": 0.982421875, "reward_std": 0.30143460631370544, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.98828125, "step": 47 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.3828125, "calib/ece": 0.5288188976377952, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.7559055118110236, "calib/gap": 0.0007895582845753868, "calib/mean_conf": 0.9311811023622047, "calib/mu_c": 0.9316504854368934, "calib/mu_w": 0.930860927152318, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5272440944881889, "calib/std_conf": 0.05975822907426125, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6786977886977887, "calib/step_q_c_n": 407.0, "calib/step_q_gap": 0.0012012852012852626, "calib/step_q_w": 0.6774965034965035, "calib/step_q_w_n": 715.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1633.0, "completions/max_terminated_length": 1633.0, "completions/mean_length": 395.47265625, "completions/mean_terminated_length": 397.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.0512, "grad_norm": 0.22006112337112427, "learning_rate": 4.25e-06, "loss": -0.0051, "num_tokens": 12132904.0, "reward": 0.90234375, "reward_std": 0.2499421089887619, "rewards/accuracy_reward_step": 0.40625, "rewards/format_reward_step": 0.9921875, "step": 48 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.40646586345381525, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.7309236947791165, "calib/gap": -0.0025699300699302263, "calib/mean_conf": 0.9365863453815261, "calib/mu_c": 0.9353787878787879, "calib/mu_w": 0.9379487179487181, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40646586345381525, "calib/std_conf": 0.03338941428420325, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7109045226130652, "calib/step_q_c_n": 597.0, "calib/step_q_gap": 0.05996422410560254, "calib/step_q_w": 0.6509402985074627, "calib/step_q_w_n": 670.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 429.953125, "completions/mean_terminated_length": 435.0513916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.05226666666666667, "grad_norm": 0.1909162700176239, "learning_rate": 4.222222222222223e-06, "loss": -0.0018, "num_tokens": 12371316.0, "reward": 1.001953125, "reward_std": 0.24757641553878784, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.97265625, "step": 49 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.36492125984251966, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7913385826771654, "calib/gap": -0.00724505327245073, "calib/mean_conf": 0.939724409448819, "calib/mu_c": 0.9366438356164384, "calib/mu_w": 0.9438888888888891, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.36492125984251966, "calib/std_conf": 0.035052306733343724, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7165669515669515, "calib/step_q_c_n": 702.0, "calib/step_q_gap": 0.0019094173203761944, "calib/step_q_w": 0.7146575342465753, "calib/step_q_w_n": 584.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 438.9140625, "completions/mean_terminated_length": 440.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.05333333333333334, "grad_norm": 0.2343200445175171, "learning_rate": 4.194444444444445e-06, "loss": -0.0039, "num_tokens": 12612846.0, "reward": 1.064453125, "reward_std": 0.2998589277267456, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.98828125, "step": 50 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.984375, "calib/ece": 0.3821343873517785, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7430830039525692, "calib/gap": -0.0132604298356509, "calib/mean_conf": 0.935494071146245, "calib/mu_c": 0.9295714285714287, "calib/mu_w": 0.9428318584070796, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3821343873517785, "calib/std_conf": 0.03893154228400845, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6802950310559006, "calib/step_q_c_n": 644.0, "calib/step_q_gap": -0.01609737400739042, "calib/step_q_w": 0.696392405063291, "calib/step_q_w_n": 632.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2526.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 451.89453125, "completions/mean_terminated_length": 453.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.0544, "grad_norm": 0.18496379256248474, "learning_rate": 4.166666666666667e-06, "loss": 0.0231, "num_tokens": 12861635.0, "reward": 1.041015625, "reward_std": 0.210560142993927, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.98828125, "step": 51 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.37109375, "calib/ece": 0.2927559055118109, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6259842519685039, "calib/gap": 0.018605053191489085, "calib/mean_conf": 0.9226771653543308, "calib/mu_c": 0.9295625, "calib/mu_w": 0.9109574468085109, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2927559055118109, "calib/std_conf": 0.07465351797668675, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7196594005449591, "calib/step_q_c_n": 734.0, "calib/step_q_gap": 0.037399660285219016, "calib/step_q_w": 0.6822597402597401, "calib/step_q_w_n": 385.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1329.0, "completions/max_terminated_length": 1329.0, "completions/mean_length": 424.953125, "completions/mean_terminated_length": 426.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.055466666666666664, "grad_norm": 0.2344915121793747, "learning_rate": 4.138888888888889e-06, "loss": 0.0139, "num_tokens": 13102183.0, "reward": 1.125, "reward_std": 0.29510045051574707, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.9921875, "step": 52 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.37109375, "calib/ece": 0.34669291338582686, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6614173228346457, "calib/gap": 0.0040999490056095045, "calib/mean_conf": 0.9293700787401574, "calib/mu_c": 0.9310810810810811, "calib/mu_w": 0.9269811320754716, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.34669291338582686, "calib/std_conf": 0.037850681174162897, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.750516129032258, "calib/step_q_c_n": 620.0, "calib/step_q_gap": 0.0504359687116166, "calib/step_q_w": 0.7000801603206414, "calib/step_q_w_n": 499.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2408.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 460.73828125, "completions/mean_terminated_length": 460.73828125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.05653333333333333, "grad_norm": 0.19638237357139587, "learning_rate": 4.111111111111111e-06, "loss": 0.0294, "num_tokens": 13349764.0, "reward": 1.07421875, "reward_std": 0.2590813636779785, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9921875, "step": 53 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.08984375, "calib/ece": 0.20062745098039214, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6431372549019608, "calib/gap": -0.010999999999999788, "calib/mean_conf": 0.9258039215686273, "calib/mu_c": 0.9230000000000004, "calib/mu_w": 0.9340000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19066666666666665, "calib/std_conf": 0.07409612717985112, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7322883597883597, "calib/step_q_c_n": 756.0, "calib/step_q_gap": -0.009876588665248542, "calib/step_q_w": 0.7421649484536083, "calib/step_q_w_n": 291.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 415.76953125, "completions/mean_terminated_length": 415.76953125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.0576, "grad_norm": 0.21822695434093475, "learning_rate": 4.083333333333334e-06, "loss": 0.0361, "num_tokens": 13586241.0, "reward": 1.234375, "reward_std": 0.22762244939804077, "rewards/accuracy_reward_step": 0.7421875, "rewards/format_reward_step": 0.984375, "step": 54 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 3.60546875, "calib/ece": 0.4306692913385826, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.5787401574803149, "calib/gap": 0.021449329359165326, "calib/mean_conf": 0.9109842519685039, "calib/mu_c": 0.9221311475409836, "calib/mu_w": 0.9006818181818183, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4306692913385826, "calib/std_conf": 0.09818939470388063, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7010098522167488, "calib/step_q_c_n": 406.0, "calib/step_q_gap": 0.010410239063944227, "calib/step_q_w": 0.6905996131528046, "calib/step_q_w_n": 517.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 425.453125, "completions/mean_terminated_length": 427.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.058666666666666666, "grad_norm": 0.24807067215442657, "learning_rate": 4.055555555555556e-06, "loss": 0.0025, "num_tokens": 13826789.0, "reward": 0.96484375, "reward_std": 0.27189987897872925, "rewards/accuracy_reward_step": 0.4765625, "rewards/format_reward_step": 0.9765625, "step": 55 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.3125, "calib/ece": 0.4680079681274901, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.6454183266932271, "calib/gap": -0.001918263090677108, "calib/mean_conf": 0.9279282868525895, "calib/mu_c": 0.9268965517241378, "calib/mu_w": 0.9288148148148149, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.4668924302788846, "calib/std_conf": 0.04074616346730289, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7365107212475633, "calib/step_q_c_n": 513.0, "calib/step_q_gap": 0.02109616964011818, "calib/step_q_w": 0.7154145516074452, "calib/step_q_w_n": 591.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 470.51171875, "completions/mean_terminated_length": 472.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.05973333333333333, "grad_norm": 0.24617721140384674, "learning_rate": 4.027777777777779e-06, "loss": 0.0402, "num_tokens": 14077888.0, "reward": 0.93359375, "reward_std": 0.3343261778354645, "rewards/accuracy_reward_step": 0.453125, "rewards/format_reward_step": 0.9609375, "step": 56 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.0859375, "calib/ece": 0.31776000000000015, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.592, "calib/gap": -0.014190386680988043, "calib/mean_conf": 0.92576, "calib/mu_c": 0.9201973684210527, "calib/mu_w": 0.9343877551020408, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.31776000000000015, "calib/std_conf": 0.03500317699866683, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7070627062706271, "calib/step_q_c_n": 606.0, "calib/step_q_gap": -0.025369111911191067, "calib/step_q_w": 0.7324318181818181, "calib/step_q_w_n": 440.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 465.53515625, "completions/mean_terminated_length": 467.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.0608, "grad_norm": 0.18185299634933472, "learning_rate": 4.000000000000001e-06, "loss": 0.0489, "num_tokens": 14327665.0, "reward": 1.078125, "reward_std": 0.233200803399086, "rewards/accuracy_reward_step": 0.59375, "rewards/format_reward_step": 0.96875, "step": 57 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.3945564516129034, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.5645161290322581, "calib/gap": 0.0200873533246414, "calib/mean_conf": 0.91875, "calib/mu_c": 0.9283076923076924, "calib/mu_w": 0.908220338983051, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3945564516129034, "calib/std_conf": 0.07247114275529304, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7396908809891808, "calib/step_q_c_n": 647.0, "calib/step_q_gap": 0.04722512756452324, "calib/step_q_w": 0.6924657534246575, "calib/step_q_w_n": 584.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2566.0, "completions/max_terminated_length": 2566.0, "completions/mean_length": 498.38671875, "completions/mean_terminated_length": 504.29644775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.06186666666666667, "grad_norm": 0.22465580701828003, "learning_rate": 3.972222222222223e-06, "loss": 0.0199, "num_tokens": 14585380.0, "reward": 0.984375, "reward_std": 0.3242889642715454, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.953125, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.14453125, "calib/ece": 0.37674509803921574, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6352941176470588, "calib/gap": -0.003002986188876511, "calib/mean_conf": 0.9277254901960784, "calib/mu_c": 0.9263829787234042, "calib/mu_w": 0.9293859649122808, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.375764705882353, "calib/std_conf": 0.04088629783007876, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7276872964169381, "calib/step_q_c_n": 614.0, "calib/step_q_gap": 0.006479242725662915, "calib/step_q_w": 0.7212080536912752, "calib/step_q_w_n": 447.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1857.0, "completions/max_terminated_length": 1857.0, "completions/mean_length": 465.66796875, "completions/mean_terminated_length": 465.66796875, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06293333333333333, "grad_norm": 0.2273540198802948, "learning_rate": 3.944444444444445e-06, "loss": 0.0299, "num_tokens": 14834647.0, "reward": 1.044921875, "reward_std": 0.32542872428894043, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98828125, "step": 59 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.07421875, "calib/ece": 0.3562549800796812, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5617529880478087, "calib/gap": -0.00657536907536882, "calib/mean_conf": 0.9259760956175299, "calib/mu_c": 0.9231468531468535, "calib/mu_w": 0.9297222222222223, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3562549800796812, "calib/std_conf": 0.03610718089585278, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7030560271646858, "calib/step_q_c_n": 589.0, "calib/step_q_gap": 0.00028069676821007583, "calib/step_q_w": 0.7027753303964758, "calib/step_q_w_n": 454.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 479.28125, "completions/mean_terminated_length": 479.28125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.064, "grad_norm": 0.21452797949314117, "learning_rate": 3.916666666666667e-06, "loss": 0.0465, "num_tokens": 15090007.0, "reward": 1.048828125, "reward_std": 0.299457311630249, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.98046875, "step": 60 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.23046875, "calib/ece": 0.3366666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5450980392156862, "calib/gap": 0.00040000000000006697, "calib/mean_conf": 0.9249019607843139, "calib/mu_c": 0.9250666666666668, "calib/mu_w": 0.9246666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3366666666666667, "calib/std_conf": 0.03846936897190413, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7230077519379845, "calib/step_q_c_n": 645.0, "calib/step_q_gap": 0.042984920887756406, "calib/step_q_w": 0.6800228310502281, "calib/step_q_w_n": 438.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1991.0, "completions/max_terminated_length": 1991.0, "completions/mean_length": 406.60546875, "completions/mean_terminated_length": 406.60546875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.06506666666666666, "grad_norm": 0.16886036098003387, "learning_rate": 3.88888888888889e-06, "loss": 0.0132, "num_tokens": 15321970.0, "reward": 1.08203125, "reward_std": 0.16696026921272278, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.9921875, "step": 61 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.37109375, "calib/ece": 0.4346000000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.552, "calib/gap": -0.007550732987644748, "calib/mean_conf": 0.9266, "calib/mu_c": 0.9227642276422765, "calib/mu_w": 0.9303149606299213, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4346000000000002, "calib/std_conf": 0.035755279330470895, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7012295081967213, "calib/step_q_c_n": 488.0, "calib/step_q_gap": -0.02986399418045771, "calib/step_q_w": 0.731093502377179, "calib/step_q_w_n": 631.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2762.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 484.98828125, "completions/mean_terminated_length": 486.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.06613333333333334, "grad_norm": 0.20834773778915405, "learning_rate": 3.861111111111112e-06, "loss": 0.0574, "num_tokens": 15577015.0, "reward": 0.970703125, "reward_std": 0.3153229355812073, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.97265625, "step": 62 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.3520634920634922, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6190476190476191, "calib/gap": -0.009914706642543014, "calib/mean_conf": 0.9294444444444444, "calib/mu_c": 0.9252739726027399, "calib/mu_w": 0.9351886792452829, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3510714285714287, "calib/std_conf": 0.03926518623524129, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.708941717791411, "calib/step_q_c_n": 652.0, "calib/step_q_gap": -0.0004204221262843477, "calib/step_q_w": 0.7093621399176954, "calib/step_q_w_n": 486.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2338.0, "completions/max_terminated_length": 2338.0, "completions/mean_length": 509.08984375, "completions/mean_terminated_length": 509.08984375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.0672, "grad_norm": 0.2037605196237564, "learning_rate": 3.833333333333334e-06, "loss": 0.0678, "num_tokens": 15839790.0, "reward": 1.0625, "reward_std": 0.32739830017089844, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.984375, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.06640625, "calib/ece": 0.3255731225296444, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4624505928853755, "calib/gap": -0.00740711974110031, "calib/mean_conf": 0.9164822134387353, "calib/mu_c": 0.9134666666666669, "calib/mu_w": 0.9208737864077672, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32458498023715426, "calib/std_conf": 0.04057731457775138, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6860690789473685, "calib/step_q_c_n": 608.0, "calib/step_q_gap": 0.0013346678619180485, "calib/step_q_w": 0.6847344110854504, "calib/step_q_w_n": 433.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 413.8125, "completions/mean_terminated_length": 417.07086181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.06826666666666667, "grad_norm": 0.23701032996177673, "learning_rate": 3.8055555555555556e-06, "loss": -0.0187, "num_tokens": 16073310.0, "reward": 1.080078125, "reward_std": 0.30969738960266113, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 3.77734375, "calib/ece": 0.3484800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.46, "calib/gap": 0.016298904538341374, "calib/mean_conf": 0.9164800000000001, "calib/mu_c": 0.9235211267605636, "calib/mu_w": 0.9072222222222223, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3484800000000001, "calib/std_conf": 0.04960251606521587, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6870819112627986, "calib/step_q_c_n": 586.0, "calib/step_q_gap": 0.014850940134189639, "calib/step_q_w": 0.672230971128609, "calib/step_q_w_n": 381.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 395.453125, "completions/mean_terminated_length": 397.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.06933333333333333, "grad_norm": 0.1639893501996994, "learning_rate": 3.777777777777778e-06, "loss": 0.0455, "num_tokens": 16303378.0, "reward": 1.044921875, "reward_std": 0.1631406545639038, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.97265625, "step": 65 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 4.37109375, "calib/ece": 0.43380952380952376, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.5753968253968254, "calib/gap": -0.00822832661290318, "calib/mean_conf": 0.9258730158730158, "calib/mu_c": 0.9216935483870968, "calib/mu_w": 0.929921875, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.43380952380952376, "calib/std_conf": 0.039866241462731676, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6984412955465586, "calib/step_q_c_n": 494.0, "calib/step_q_gap": -0.0016107044534413584, "calib/step_q_w": 0.700052, "calib/step_q_w_n": 625.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2367.0, "completions/max_terminated_length": 2367.0, "completions/mean_length": 479.359375, "completions/mean_terminated_length": 481.2392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.0704, "grad_norm": 0.19545036554336548, "learning_rate": 3.7500000000000005e-06, "loss": 0.0222, "num_tokens": 16556254.0, "reward": 0.97265625, "reward_std": 0.2654076814651489, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.9765625, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.171875, "calib/ece": 0.3474803149606299, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.547244094488189, "calib/gap": -0.0065306122448981485, "calib/mean_conf": 0.9262204724409449, "calib/mu_c": 0.923469387755102, "calib/mu_w": 0.9300000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3474803149606299, "calib/std_conf": 0.03906241994225783, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.690810372771475, "calib/step_q_c_n": 617.0, "calib/step_q_gap": -0.01570847423517696, "calib/step_q_w": 0.7065188470066519, "calib/step_q_w_n": 451.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2231.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 451.296875, "completions/mean_terminated_length": 451.296875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.07146666666666666, "grad_norm": 0.1560167670249939, "learning_rate": 3.7222222222222225e-06, "loss": 0.0265, "num_tokens": 16800602.0, "reward": 1.0703125, "reward_std": 0.15099012851715088, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.17578125, "calib/ece": 0.3788627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4745098039215686, "calib/gap": -0.016532846715328398, "calib/mean_conf": 0.9161176470588235, "calib/mu_c": 0.9084671532846716, "calib/mu_w": 0.925, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3788627450980392, "calib/std_conf": 0.043496967241056575, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7010873440285204, "calib/step_q_c_n": 561.0, "calib/step_q_gap": 0.011736950327732809, "calib/step_q_w": 0.6893503937007875, "calib/step_q_w_n": 508.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2867.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 419.78125, "completions/mean_terminated_length": 419.78125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07253333333333334, "grad_norm": 0.1673060655593872, "learning_rate": 3.694444444444445e-06, "loss": 0.0282, "num_tokens": 17035962.0, "reward": 1.03125, "reward_std": 0.15729427337646484, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.45079681274900396, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5896414342629482, "calib/gap": 0.0021867838044310473, "calib/mean_conf": 0.9249003984063744, "calib/mu_c": 0.9260504201680674, "calib/mu_w": 0.9238636363636363, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.45079681274900396, "calib/std_conf": 0.041967190275287995, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7208226452905812, "calib/step_q_c_n": 499.0, "calib/step_q_gap": 0.03166955082804035, "calib/step_q_w": 0.6891530944625408, "calib/step_q_w_n": 614.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 522.390625, "completions/mean_terminated_length": 524.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.0736, "grad_norm": 0.21035178005695343, "learning_rate": 3.6666666666666666e-06, "loss": 0.0529, "num_tokens": 17297998.0, "reward": 0.955078125, "reward_std": 0.3176359534263611, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.98046875, "step": 69 }, { "calib/answer_extract_rate": 0.9453125, "calib/avg_num_step_conf": 4.58984375, "calib/ece": 0.44493775933609964, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.6058091286307054, "calib/gap": -0.016115936035290823, "calib/mean_conf": 0.9270954356846473, "calib/mu_c": 0.9188034188034188, "calib/mu_w": 0.9349193548387096, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.44327800829875524, "calib/std_conf": 0.04887815339783165, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.6921166306695464, "calib/step_q_c_n": 463.0, "calib/step_q_gap": -0.043557526633824284, "calib/step_q_w": 0.7356741573033707, "calib/step_q_w_n": 712.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 571.875, "completions/mean_terminated_length": 574.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07466666666666667, "grad_norm": 0.21151414513587952, "learning_rate": 3.638888888888889e-06, "loss": 0.1262, "num_tokens": 17575198.0, "reward": 0.923828125, "reward_std": 0.35245469212532043, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.93359375, "step": 70 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.40937007874015746, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.7086614173228346, "calib/gap": -0.0026392779333955785, "calib/mean_conf": 0.9408661417322836, "calib/mu_c": 0.9396296296296297, "calib/mu_w": 0.9422689075630253, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40937007874015746, "calib/std_conf": 0.03520521664206253, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7219156804733727, "calib/step_q_c_n": 676.0, "calib/step_q_gap": -0.0016941326107394739, "calib/step_q_w": 0.7236098130841122, "calib/step_q_w_n": 642.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1966.0, "completions/max_terminated_length": 1966.0, "completions/mean_length": 498.20703125, "completions/mean_terminated_length": 500.16082763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.07573333333333333, "grad_norm": 0.1976437270641327, "learning_rate": 3.6111111111111115e-06, "loss": 0.0358, "num_tokens": 17830955.0, "reward": 1.021484375, "reward_std": 0.31583717465400696, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.98828125, "step": 71 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.44921875, "calib/ece": 0.4215139442231076, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.6772908366533864, "calib/gap": -0.011867454568560087, "calib/mean_conf": 0.9354581673306773, "calib/mu_c": 0.9296899224806202, "calib/mu_w": 0.9415573770491803, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4215139442231076, "calib/std_conf": 0.033353252991088246, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7225493716337522, "calib/step_q_c_n": 557.0, "calib/step_q_gap": 0.000298512527223016, "calib/step_q_w": 0.7222508591065292, "calib/step_q_w_n": 582.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2346.0, "completions/max_terminated_length": 2346.0, "completions/mean_length": 466.0625, "completions/mean_terminated_length": 467.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.0768, "grad_norm": 0.23634567856788635, "learning_rate": 3.5833333333333335e-06, "loss": 0.0312, "num_tokens": 18078483.0, "reward": 0.994140625, "reward_std": 0.2701350450515747, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.98046875, "step": 72 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 4.57421875, "calib/ece": 0.3489682539682539, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": -0.004851762559457962, "calib/mean_conf": 0.9402380952380952, "calib/mu_c": 0.9382550335570471, "calib/mu_w": 0.943106796116505, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3489682539682539, "calib/std_conf": 0.031859420517908986, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7216890881913303, "calib/step_q_c_n": 669.0, "calib/step_q_gap": -0.010103740493928703, "calib/step_q_w": 0.731792828685259, "calib/step_q_w_n": 502.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 482.48046875, "completions/mean_terminated_length": 482.48046875, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.07786666666666667, "grad_norm": 0.21767885982990265, "learning_rate": 3.555555555555556e-06, "loss": 0.049, "num_tokens": 18332838.0, "reward": 1.072265625, "reward_std": 0.3525664806365967, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.98046875, "step": 73 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.40681632653061217, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.710204081632653, "calib/gap": -0.00812374581939801, "calib/mean_conf": 0.9374285714285714, "calib/mu_c": 0.9336153846153846, "calib/mu_w": 0.9417391304347826, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.40681632653061217, "calib/std_conf": 0.034409301068170486, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7151140939597316, "calib/step_q_c_n": 596.0, "calib/step_q_gap": -0.023107059886422254, "calib/step_q_w": 0.7382211538461538, "calib/step_q_w_n": 624.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 518.82421875, "completions/mean_terminated_length": 518.82421875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.07893333333333333, "grad_norm": 0.19470734894275665, "learning_rate": 3.5277777777777784e-06, "loss": 0.0318, "num_tokens": 18593393.0, "reward": 0.98828125, "reward_std": 0.26092615723609924, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.953125, "step": 74 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.109375, "calib/ece": 0.265, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.7559055118110236, "calib/gap": -0.011919540229885284, "calib/mean_conf": 0.9378346456692913, "calib/mu_c": 0.9340804597701148, "calib/mu_w": 0.9460000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2588976377952756, "calib/std_conf": 0.037799150884002854, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7347410817031069, "calib/step_q_c_n": 869.0, "calib/step_q_gap": -0.017901287317394154, "calib/step_q_w": 0.7526423690205011, "calib/step_q_w_n": 439.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 478.08203125, "completions/mean_terminated_length": 478.08203125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.08, "grad_norm": 0.18703636527061462, "learning_rate": 3.5e-06, "loss": 0.0082, "num_tokens": 18844342.0, "reward": 1.17578125, "reward_std": 0.20858918130397797, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.9921875, "step": 75 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.3125, "calib/ece": 0.30292490118577065, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.6798418972332015, "calib/gap": -0.0008457112270842115, "calib/mean_conf": 0.9313833992094861, "calib/mu_c": 0.9310691823899372, "calib/mu_w": 0.9319148936170214, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30292490118577065, "calib/std_conf": 0.038066709194776505, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7095245398773007, "calib/step_q_c_n": 652.0, "calib/step_q_gap": 0.014502415983495442, "calib/step_q_w": 0.6950221238938052, "calib/step_q_w_n": 452.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2105.0, "completions/max_terminated_length": 2105.0, "completions/mean_length": 468.44921875, "completions/mean_terminated_length": 468.44921875, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.08106666666666666, "grad_norm": 0.22163182497024536, "learning_rate": 3.4722222222222224e-06, "loss": 0.048, "num_tokens": 19091129.0, "reward": 1.111328125, "reward_std": 0.2719145119190216, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.98046875, "step": 76 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.0625, "calib/ece": 0.3633596837944664, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.6996047430830039, "calib/gap": -0.0018869731800768008, "calib/mean_conf": 0.9325296442687747, "calib/mu_c": 0.9317241379310346, "calib/mu_w": 0.9336111111111114, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36138339920948614, "calib/std_conf": 0.04607868453686756, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7270705725699067, "calib/step_q_c_n": 751.0, "calib/step_q_gap": -0.0004707118337630156, "calib/step_q_w": 0.7275412844036697, "calib/step_q_w_n": 545.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1839.0, "completions/max_terminated_length": 1839.0, "completions/mean_length": 476.38671875, "completions/mean_terminated_length": 478.25494384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.08213333333333334, "grad_norm": 0.21457961201667786, "learning_rate": 3.444444444444445e-06, "loss": 0.021, "num_tokens": 19341556.0, "reward": 1.056640625, "reward_std": 0.28331270813941956, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98046875, "step": 77 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.3671875, "calib/ece": 0.3709523809523809, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.7936507936507936, "calib/gap": -4.629629629648857e-05, "calib/mean_conf": 0.9423809523809523, "calib/mu_c": 0.942361111111111, "calib/mu_w": 0.9424074074074075, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3709523809523809, "calib/std_conf": 0.030156432661026095, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7423627075351213, "calib/step_q_c_n": 783.0, "calib/step_q_gap": 0.0015843657415511858, "calib/step_q_w": 0.7407783417935702, "calib/step_q_w_n": 591.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2333.0, "completions/max_terminated_length": 2333.0, "completions/mean_length": 538.58203125, "completions/mean_terminated_length": 540.6941528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.0832, "grad_norm": 0.20668213069438934, "learning_rate": 3.416666666666667e-06, "loss": 0.0296, "num_tokens": 19611265.0, "reward": 1.05859375, "reward_std": 0.28108295798301697, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.984375, "step": 78 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.3499218749999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.765625, "calib/gap": -0.004756859035004668, "calib/mean_conf": 0.9397656249999999, "calib/mu_c": 0.937814569536424, "calib/mu_w": 0.9425714285714286, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3499218749999999, "calib/std_conf": 0.0363890617955365, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7532355658198613, "calib/step_q_c_n": 866.0, "calib/step_q_gap": 0.029415988084736444, "calib/step_q_w": 0.7238195777351248, "calib/step_q_w_n": 521.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1599.0, "completions/max_terminated_length": 1599.0, "completions/mean_length": 517.94140625, "completions/mean_terminated_length": 519.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.08426666666666667, "grad_norm": 0.18805666267871857, "learning_rate": 3.3888888888888893e-06, "loss": 0.0299, "num_tokens": 19874042.0, "reward": 1.08984375, "reward_std": 0.25407272577285767, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 1.0, "step": 79 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.68359375, "calib/ece": 0.2543529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.7372549019607844, "calib/gap": -0.004646658152405281, "calib/mean_conf": 0.9367058823529413, "calib/mu_c": 0.9352298850574714, "calib/mu_w": 0.9398765432098767, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2543529411764706, "calib/std_conf": 0.034210016947022547, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7527200791295746, "calib/step_q_c_n": 1011.0, "calib/step_q_gap": 0.007157016066511646, "calib/step_q_w": 0.745563063063063, "calib/step_q_w_n": 444.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 486.953125, "completions/mean_terminated_length": 486.953125, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.08533333333333333, "grad_norm": 0.20187772810459137, "learning_rate": 3.3611111111111117e-06, "loss": 0.0085, "num_tokens": 20124670.0, "reward": 1.177734375, "reward_std": 0.29367175698280334, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 5.328125, "calib/ece": 0.3355158730158729, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.6626984126984127, "calib/gap": -0.01723707664884122, "calib/mean_conf": 0.9311507936507937, "calib/mu_c": 0.9243790849673204, "calib/mu_w": 0.9416161616161616, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32976190476190465, "calib/std_conf": 0.03569971042973518, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7176338028169014, "calib/step_q_c_n": 710.0, "calib/step_q_gap": -0.04826833785588158, "calib/step_q_w": 0.765902140672783, "calib/step_q_w_n": 654.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 513.9453125, "completions/mean_terminated_length": 515.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.0864, "grad_norm": 0.19790305197238922, "learning_rate": 3.3333333333333333e-06, "loss": 0.0468, "num_tokens": 20386296.0, "reward": 1.0859375, "reward_std": 0.2682061493396759, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9765625, "step": 81 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.8515625, "calib/ece": 0.31055555555555553, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6150793650793651, "calib/gap": 0.00011040904556058884, "calib/mean_conf": 0.9256349206349206, "calib/mu_c": 0.9256774193548388, "calib/mu_w": 0.9255670103092782, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31055555555555553, "calib/std_conf": 0.04877867136924964, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7049134199134199, "calib/step_q_c_n": 693.0, "calib/step_q_gap": -0.014102973529202978, "calib/step_q_w": 0.7190163934426229, "calib/step_q_w_n": 549.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 438.8359375, "completions/mean_terminated_length": 442.2913513183594, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.08746666666666666, "grad_norm": 0.23972539603710175, "learning_rate": 3.3055555555555558e-06, "loss": -0.0094, "num_tokens": 20627998.0, "reward": 1.09765625, "reward_std": 0.2942523658275604, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.984375, "step": 82 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.234375, "calib/ece": 0.34218253968253964, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7420634920634921, "calib/gap": -0.008121568627451059, "calib/mean_conf": 0.937420634920635, "calib/mu_c": 0.9341333333333334, "calib/mu_w": 0.9422549019607844, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.34218253968253964, "calib/std_conf": 0.032429619318158576, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7267061923583662, "calib/step_q_c_n": 759.0, "calib/step_q_gap": -0.005428058932511637, "calib/step_q_w": 0.7321342512908778, "calib/step_q_w_n": 581.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2298.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 548.875, "completions/mean_terminated_length": 548.875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.08853333333333334, "grad_norm": 0.1901319921016693, "learning_rate": 3.277777777777778e-06, "loss": 0.0438, "num_tokens": 20899582.0, "reward": 1.080078125, "reward_std": 0.1934780478477478, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.98046875, "step": 83 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.3473725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.6392156862745098, "calib/gap": 0.00971844293272861, "calib/mean_conf": 0.9238431372549019, "calib/mu_c": 0.9279591836734694, "calib/mu_w": 0.9182407407407408, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3473725490196079, "calib/std_conf": 0.054500718667331985, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7106328124999999, "calib/step_q_c_n": 640.0, "calib/step_q_gap": 0.007455034722222087, "calib/step_q_w": 0.7031777777777778, "calib/step_q_w_n": 450.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2025.0, "completions/max_terminated_length": 2025.0, "completions/mean_length": 456.90625, "completions/mean_terminated_length": 456.90625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.0896, "grad_norm": 0.26204726099967957, "learning_rate": 3.2500000000000002e-06, "loss": 0.0192, "num_tokens": 21146278.0, "reward": 1.072265625, "reward_std": 0.25500500202178955, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.40574297188755, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.7269076305220884, "calib/gap": -0.0045426316470432715, "calib/mean_conf": 0.931847389558233, "calib/mu_c": 0.9296946564885497, "calib/mu_w": 0.9342372881355929, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40574297188755, "calib/std_conf": 0.03905254419426512, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7091607142857143, "calib/step_q_c_n": 560.0, "calib/step_q_gap": -0.007326627486437642, "calib/step_q_w": 0.7164873417721519, "calib/step_q_w_n": 632.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 532.96875, "completions/mean_terminated_length": 537.1653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.09066666666666667, "grad_norm": 0.16692368686199188, "learning_rate": 3.2222222222222227e-06, "loss": 0.038, "num_tokens": 21414350.0, "reward": 0.998046875, "reward_std": 0.1843021810054779, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.97265625, "step": 85 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.397755905511811, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.6259842519685039, "calib/gap": 0.004126361655773514, "calib/mean_conf": 0.929251968503937, "calib/mu_c": 0.9311851851851853, "calib/mu_w": 0.9270588235294118, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.397755905511811, "calib/std_conf": 0.03826749376170533, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7281309904153355, "calib/step_q_c_n": 626.0, "calib/step_q_gap": 0.017033347317692238, "calib/step_q_w": 0.7110976430976432, "calib/step_q_w_n": 594.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1937.0, "completions/max_terminated_length": 1937.0, "completions/mean_length": 487.19921875, "completions/mean_terminated_length": 489.1098327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.09173333333333333, "grad_norm": 0.22901059687137604, "learning_rate": 3.1944444444444443e-06, "loss": 0.0125, "num_tokens": 21668393.0, "reward": 1.015625, "reward_std": 0.24991528689861298, "rewards/accuracy_reward_step": 0.52734375, "rewards/format_reward_step": 0.9765625, "step": 86 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 4.125, "calib/ece": 0.1978571428571427, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6706349206349206, "calib/gap": -0.008534086325131418, "calib/mean_conf": 0.9288095238095239, "calib/mu_c": 0.9265405405405405, "calib/mu_w": 0.9350746268656719, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19626984126984112, "calib/std_conf": 0.04113676652449365, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7102567567567568, "calib/step_q_c_n": 740.0, "calib/step_q_gap": 0.016775744098529, "calib/step_q_w": 0.6934810126582278, "calib/step_q_w_n": 316.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2655.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 445.6796875, "completions/mean_terminated_length": 450.9644470214844, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.0928, "grad_norm": 0.20625683665275574, "learning_rate": 3.1666666666666667e-06, "loss": 0.0095, "num_tokens": 21911791.0, "reward": 1.21484375, "reward_std": 0.21918149292469025, "rewards/accuracy_reward_step": 0.72265625, "rewards/format_reward_step": 0.984375, "step": 87 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.35972549019607836, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.7137254901960784, "calib/gap": 0.0007678773407061579, "calib/mean_conf": 0.9322745098039215, "calib/mu_c": 0.9326027397260274, "calib/mu_w": 0.9318348623853212, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35972549019607836, "calib/std_conf": 0.03309408953294543, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7127668539325842, "calib/step_q_c_n": 712.0, "calib/step_q_gap": 0.04421088863974254, "calib/step_q_w": 0.6685559652928417, "calib/step_q_w_n": 461.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 485.171875, "completions/mean_terminated_length": 487.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.09386666666666667, "grad_norm": 0.2110147923231125, "learning_rate": 3.138888888888889e-06, "loss": -0.0216, "num_tokens": 22169651.0, "reward": 1.068359375, "reward_std": 0.27117839455604553, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.99609375, "step": 88 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.43505928853754944, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6798418972332015, "calib/gap": -0.004131874999999785, "calib/mean_conf": 0.9291304347826087, "calib/mu_c": 0.9270400000000001, "calib/mu_w": 0.9311718749999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43505928853754944, "calib/std_conf": 0.040030062598860244, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6985401459854015, "calib/step_q_c_n": 548.0, "calib/step_q_gap": 0.037421064206348276, "calib/step_q_w": 0.6611190817790532, "calib/step_q_w_n": 697.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 505.09765625, "completions/mean_terminated_length": 507.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.09493333333333333, "grad_norm": 0.19138966500759125, "learning_rate": 3.1111111111111116e-06, "loss": 0.0163, "num_tokens": 22431652.0, "reward": 0.98046875, "reward_std": 0.22767284512519836, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.984375, "step": 89 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.3004724409448818, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6259842519685039, "calib/gap": -0.0036425024826215457, "calib/mean_conf": 0.9264566929133858, "calib/mu_c": 0.9250943396226418, "calib/mu_w": 0.9287368421052633, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3004724409448818, "calib/std_conf": 0.03864898688825327, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6829776021080369, "calib/step_q_c_n": 759.0, "calib/step_q_gap": -0.029058326035675708, "calib/step_q_w": 0.7120359281437126, "calib/step_q_w_n": 501.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 485.05859375, "completions/mean_terminated_length": 486.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.096, "grad_norm": 0.18919505178928375, "learning_rate": 3.0833333333333336e-06, "loss": 0.021, "num_tokens": 22682955.0, "reward": 1.1171875, "reward_std": 0.24751797318458557, "rewards/accuracy_reward_step": 0.62109375, "rewards/format_reward_step": 0.9921875, "step": 90 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.29296875, "calib/ece": 0.35011811023622047, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.65748031496063, "calib/gap": 0.0019028545997843427, "calib/mean_conf": 0.9288582677165355, "calib/mu_c": 0.9296598639455784, "calib/mu_w": 0.9277570093457941, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35011811023622047, "calib/std_conf": 0.03799963207643035, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7166071428571429, "calib/step_q_c_n": 728.0, "calib/step_q_gap": 0.009137711703501195, "calib/step_q_w": 0.7074694311536417, "calib/step_q_w_n": 627.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2022.0, "completions/max_terminated_length": 2022.0, "completions/mean_length": 513.4921875, "completions/mean_terminated_length": 515.5059204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.09706666666666666, "grad_norm": 0.18537934124469757, "learning_rate": 3.055555555555556e-06, "loss": -0.0098, "num_tokens": 22945929.0, "reward": 1.0703125, "reward_std": 0.254516065120697, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.9921875, "step": 91 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.83203125, "calib/ece": 0.32298039215686275, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.6196078431372549, "calib/gap": 0.002903433200462824, "calib/mean_conf": 0.9269019607843137, "calib/mu_c": 0.9280519480519481, "calib/mu_w": 0.9251485148514853, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32298039215686275, "calib/std_conf": 0.041171689731792756, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7056141304347826, "calib/step_q_c_n": 736.0, "calib/step_q_gap": 0.025239878937776572, "calib/step_q_w": 0.680374251497006, "calib/step_q_w_n": 501.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 480.91796875, "completions/mean_terminated_length": 480.91796875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.09813333333333334, "grad_norm": 0.28069862723350525, "learning_rate": 3.0277777777777776e-06, "loss": 0.0081, "num_tokens": 23199572.0, "reward": 1.099609375, "reward_std": 0.282889187335968, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.99609375, "step": 92 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.48046875, "calib/ece": 0.43621093749999984, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.67578125, "calib/gap": 0.005967032967033048, "calib/mean_conf": 0.9283984374999998, "calib/mu_c": 0.9314285714285715, "calib/mu_w": 0.9254615384615384, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.43621093749999984, "calib/std_conf": 0.04112876879458698, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.69, "calib/step_q_c_n": 653.0, "calib/step_q_gap": -0.03217333333333339, "calib/step_q_w": 0.7221733333333333, "calib/step_q_w_n": 750.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 509.3671875, "completions/mean_terminated_length": 511.36474609375, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.0992, "grad_norm": 0.2235431671142578, "learning_rate": 3e-06, "loss": 0.0208, "num_tokens": 23459554.0, "reward": 0.9921875, "reward_std": 0.29287609457969666, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 1.0, "step": 93 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.3397254901960783, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.7686274509803922, "calib/gap": 0.007563041263372372, "calib/mean_conf": 0.9318823529411765, "calib/mu_c": 0.9349668874172186, "calib/mu_w": 0.9274038461538462, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3397254901960783, "calib/std_conf": 0.047522810646241954, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7172486772486772, "calib/step_q_c_n": 756.0, "calib/step_q_gap": 0.11901224314015013, "calib/step_q_w": 0.5982364341085271, "calib/step_q_w_n": 516.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 462.046875, "completions/mean_terminated_length": 463.8588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.10026666666666667, "grad_norm": 0.18933187425136566, "learning_rate": 2.9722222222222225e-06, "loss": 0.0087, "num_tokens": 23710326.0, "reward": 1.087890625, "reward_std": 0.2208297997713089, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.99609375, "step": 94 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.28125, "calib/ece": 0.354584980237154, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6996047430830039, "calib/gap": 0.017085244648318065, "calib/mean_conf": 0.9174308300395257, "calib/mu_c": 0.9247916666666668, "calib/mu_w": 0.9077064220183487, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3514229249011856, "calib/std_conf": 0.09339088747697985, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6632329842931937, "calib/step_q_c_n": 764.0, "calib/step_q_gap": 0.07512073939523445, "calib/step_q_w": 0.5881122448979592, "calib/step_q_w_n": 588.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2246.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 486.41796875, "completions/mean_terminated_length": 488.3255310058594, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.10133333333333333, "grad_norm": 0.21531540155410767, "learning_rate": 2.944444444444445e-06, "loss": 0.0167, "num_tokens": 23964785.0, "reward": 1.056640625, "reward_std": 0.2692474126815796, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.98828125, "step": 95 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 4.953125, "calib/ece": 0.25441406250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.73828125, "calib/gap": -0.003654331370900321, "calib/mean_conf": 0.9341015624999999, "calib/mu_c": 0.9329310344827585, "calib/mu_w": 0.9365853658536588, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25441406250000004, "calib/std_conf": 0.037634828418083584, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6484816132858838, "calib/step_q_c_n": 843.0, "calib/step_q_gap": -0.0004242690670573923, "calib/step_q_w": 0.6489058823529412, "calib/step_q_w_n": 425.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 445.0234375, "completions/mean_terminated_length": 446.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1024, "grad_norm": 0.21615636348724365, "learning_rate": 2.916666666666667e-06, "loss": 0.0125, "num_tokens": 24208335.0, "reward": 1.1796875, "reward_std": 0.22514329850673676, "rewards/accuracy_reward_step": 0.6796875, "rewards/format_reward_step": 1.0, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.3588976377952755, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6338582677165354, "calib/gap": 0.022059875729618095, "calib/mean_conf": 0.914015748031496, "calib/mu_c": 0.9238297872340427, "calib/mu_w": 0.9017699115044246, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3588976377952755, "calib/std_conf": 0.09175678088628476, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6680576631259484, "calib/step_q_c_n": 659.0, "calib/step_q_gap": 0.00863143361775165, "calib/step_q_w": 0.6594262295081967, "calib/step_q_w_n": 610.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2596.0, "completions/max_terminated_length": 2596.0, "completions/mean_length": 471.91796875, "completions/mean_terminated_length": 471.91796875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.10346666666666667, "grad_norm": 0.19775576889514923, "learning_rate": 2.888888888888889e-06, "loss": 0.0527, "num_tokens": 24458026.0, "reward": 1.046875, "reward_std": 0.2200184315443039, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.9921875, "step": 97 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.38138339920948616, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6047430830039525, "calib/gap": 0.034307036247334666, "calib/mean_conf": 0.9110276679841898, "calib/mu_c": 0.9271641791044777, "calib/mu_w": 0.892857142857143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.38138339920948616, "calib/std_conf": 0.11173325106066419, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.646115569823435, "calib/step_q_c_n": 623.0, "calib/step_q_gap": 0.06539343481558568, "calib/step_q_w": 0.5807221350078493, "calib/step_q_w_n": 637.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 514.59375, "completions/mean_terminated_length": 514.59375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.10453333333333334, "grad_norm": 0.21482080221176147, "learning_rate": 2.861111111111111e-06, "loss": 0.0683, "num_tokens": 24719754.0, "reward": 1.015625, "reward_std": 0.30873024463653564, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.984375, "step": 98 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.4453125, "calib/ece": 0.5184313725490195, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.5568627450980392, "calib/gap": 0.004172560113154256, "calib/mean_conf": 0.9145098039215687, "calib/mu_c": 0.9170297029702972, "calib/mu_w": 0.9128571428571429, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5184313725490195, "calib/std_conf": 0.05420056166014588, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6449285714285714, "calib/step_q_c_n": 560.0, "calib/step_q_gap": 0.05164319972593345, "calib/step_q_w": 0.593285371702638, "calib/step_q_w_n": 834.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1263.0, "completions/max_terminated_length": 1263.0, "completions/mean_length": 526.65234375, "completions/mean_terminated_length": 528.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1056, "grad_norm": 0.17127177119255066, "learning_rate": 2.8333333333333335e-06, "loss": -0.0171, "num_tokens": 24984185.0, "reward": 0.892578125, "reward_std": 0.20884855091571808, "rewards/accuracy_reward_step": 0.39453125, "rewards/format_reward_step": 0.99609375, "step": 99 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.3804365079365078, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.5317460317460317, "calib/gap": 0.014262108262108386, "calib/mean_conf": 0.9090079365079363, "calib/mu_c": 0.9156296296296298, "calib/mu_w": 0.9013675213675214, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3768650793650792, "calib/std_conf": 0.07703977492918072, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6183102493074792, "calib/step_q_c_n": 722.0, "calib/step_q_gap": 0.059225620637358256, "calib/step_q_w": 0.559084628670121, "calib/step_q_w_n": 579.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 535.65234375, "completions/mean_terminated_length": 535.65234375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.10666666666666667, "grad_norm": 0.21172572672367096, "learning_rate": 2.805555555555556e-06, "loss": 0.041, "num_tokens": 25252528.0, "reward": 1.0234375, "reward_std": 0.2752569019794464, "rewards/accuracy_reward_step": 0.53125, "rewards/format_reward_step": 0.984375, "step": 100 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.41351562500000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.49609375, "calib/gap": 0.03322748091603056, "calib/mean_conf": 0.9017968749999999, "calib/mu_c": 0.9188, "calib/mu_w": 0.8855725190839694, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41351562500000005, "calib/std_conf": 0.11340217145290639, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.549247572815534, "calib/step_q_c_n": 824.0, "calib/step_q_gap": -0.03996922627640698, "calib/step_q_w": 0.589216799091941, "calib/step_q_w_n": 881.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1932.0, "completions/max_terminated_length": 1932.0, "completions/mean_length": 546.21484375, "completions/mean_terminated_length": 548.3568725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.10773333333333333, "grad_norm": 0.1781286895275116, "learning_rate": 2.7777777777777783e-06, "loss": 0.0106, "num_tokens": 25523159.0, "reward": 0.98828125, "reward_std": 0.25395649671554565, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 1.0, "step": 101 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 5.74609375, "calib/ece": 0.3009486166007904, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.48221343873517786, "calib/gap": 0.017669552669552724, "calib/mean_conf": 0.9096442687747036, "calib/mu_c": 0.9165584415584418, "calib/mu_w": 0.8988888888888891, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3009486166007904, "calib/std_conf": 0.05771224735548806, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6087775061124695, "calib/step_q_c_n": 818.0, "calib/step_q_gap": -0.011360319308663658, "calib/step_q_w": 0.6201378254211332, "calib/step_q_w_n": 653.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2739.0, "completions/max_terminated_length": 2739.0, "completions/mean_length": 467.109375, "completions/mean_terminated_length": 470.78741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1088, "grad_norm": 0.21871761977672577, "learning_rate": 2.7500000000000004e-06, "loss": 0.0158, "num_tokens": 25773243.0, "reward": 1.095703125, "reward_std": 0.2334584891796112, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.3555686274509805, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.4470588235294118, "calib/gap": 0.013400520962540297, "calib/mean_conf": 0.9006666666666667, "calib/mu_c": 0.9067625899280577, "calib/mu_w": 0.8933620689655174, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3555686274509805, "calib/std_conf": 0.0825948454610336, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6030913978494623, "calib/step_q_c_n": 744.0, "calib/step_q_gap": 0.0594561037318152, "calib/step_q_w": 0.5436352941176471, "calib/step_q_w_n": 850.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2392.0, "completions/max_terminated_length": 2392.0, "completions/mean_length": 568.30078125, "completions/mean_terminated_length": 570.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.10986666666666667, "grad_norm": 0.20121116936206818, "learning_rate": 2.7222222222222224e-06, "loss": -0.0099, "num_tokens": 26047088.0, "reward": 1.041015625, "reward_std": 0.25462427735328674, "rewards/accuracy_reward_step": 0.54296875, "rewards/format_reward_step": 0.99609375, "step": 103 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.4462450592885375, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4268774703557312, "calib/gap": 0.01959420289855074, "calib/mean_conf": 0.9007905138339921, "calib/mu_c": 0.9114782608695653, "calib/mu_w": 0.8918840579710146, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4462450592885375, "calib/std_conf": 0.07168888363264464, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6090327169274539, "calib/step_q_c_n": 703.0, "calib/step_q_gap": 0.06891366930840614, "calib/step_q_w": 0.5401190476190477, "calib/step_q_w_n": 924.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 527.26171875, "completions/mean_terminated_length": 529.3294677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.11093333333333333, "grad_norm": 0.1557941883802414, "learning_rate": 2.6944444444444444e-06, "loss": 0.0554, "num_tokens": 26312555.0, "reward": 0.943359375, "reward_std": 0.20018061995506287, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.98828125, "step": 104 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.37460937499999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.39453125, "calib/gap": 0.043318670576735, "calib/mean_conf": 0.880078125, "calib/mu_c": 0.9010606060606062, "calib/mu_w": 0.8577419354838712, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36953125, "calib/std_conf": 0.14698796769288422, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6244238410596027, "calib/step_q_c_n": 755.0, "calib/step_q_gap": 0.0893762220119837, "calib/step_q_w": 0.535047619047619, "calib/step_q_w_n": 840.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1821.0, "completions/max_terminated_length": 1821.0, "completions/mean_length": 526.8203125, "completions/mean_terminated_length": 528.8862915039062, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.112, "grad_norm": 0.23184296488761902, "learning_rate": 2.666666666666667e-06, "loss": 0.012, "num_tokens": 26576989.0, "reward": 1.015625, "reward_std": 0.3553318381309509, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 1.0, "step": 105 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 5.6640625, "calib/ece": 0.33258823529411763, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3254901960784314, "calib/gap": 0.03748251748251763, "calib/mean_conf": 0.8910196078431373, "calib/mu_c": 0.9074825174825177, "calib/mu_w": 0.8700000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33141176470588235, "calib/std_conf": 0.09624997890421533, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6440481927710843, "calib/step_q_c_n": 830.0, "calib/step_q_gap": 0.11122561212592297, "calib/step_q_w": 0.5328225806451613, "calib/step_q_w_n": 620.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2346.0, "completions/max_terminated_length": 2346.0, "completions/mean_length": 487.78125, "completions/mean_terminated_length": 487.78125, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.11306666666666666, "grad_norm": 0.1972045749425888, "learning_rate": 2.6388888888888893e-06, "loss": 0.0319, "num_tokens": 26830253.0, "reward": 1.056640625, "reward_std": 0.19517174363136292, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.99609375, "step": 106 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.48828125, "calib/ece": 0.30525691699604746, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.32806324110671936, "calib/gap": 0.0016199194909755077, "calib/mean_conf": 0.8957707509881423, "calib/mu_c": 0.8964238410596029, "calib/mu_w": 0.8948039215686274, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3020948616600791, "calib/std_conf": 0.06305072628611637, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5832730732635585, "calib/step_q_c_n": 1051.0, "calib/step_q_gap": 0.021404220804542073, "calib/step_q_w": 0.5618688524590164, "calib/step_q_w_n": 610.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2030.0, "completions/max_terminated_length": 2030.0, "completions/mean_length": 482.44140625, "completions/mean_terminated_length": 486.2401428222656, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.11413333333333334, "grad_norm": 0.21706554293632507, "learning_rate": 2.6111111111111113e-06, "loss": -0.0156, "num_tokens": 27082182.0, "reward": 1.083984375, "reward_std": 0.29333245754241943, "rewards/accuracy_reward_step": 0.58984375, "rewards/format_reward_step": 0.98828125, "step": 107 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.39453125, "calib/ece": 0.22215139442231088, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3426294820717131, "calib/gap": 0.012996824938663498, "calib/mean_conf": 0.8954581673306773, "calib/mu_c": 0.8997041420118344, "calib/mu_w": 0.8867073170731709, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22215139442231088, "calib/std_conf": 0.05985448192909104, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5330503731343285, "calib/step_q_c_n": 1072.0, "calib/step_q_gap": 0.02228931118742583, "calib/step_q_w": 0.5107610619469026, "calib/step_q_w_n": 565.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 528.4609375, "completions/mean_terminated_length": 534.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.1152, "grad_norm": 0.1876976191997528, "learning_rate": 2.5833333333333337e-06, "loss": 0.0132, "num_tokens": 27344508.0, "reward": 1.150390625, "reward_std": 0.2519606649875641, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.98046875, "step": 108 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.4309881422924902, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.32806324110671936, "calib/gap": 0.007616957230653587, "calib/mean_conf": 0.9013438735177867, "calib/mu_c": 0.9053781512605045, "calib/mu_w": 0.8977611940298509, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4309881422924902, "calib/std_conf": 0.047271107881366875, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6075387384412153, "calib/step_q_c_n": 757.0, "calib/step_q_gap": 0.13707195208573963, "calib/step_q_w": 0.4704667863554757, "calib/step_q_w_n": 1114.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2763.0, "completions/max_terminated_length": 2763.0, "completions/mean_length": 563.3828125, "completions/mean_terminated_length": 565.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.11626666666666667, "grad_norm": 0.1523021161556244, "learning_rate": 2.5555555555555557e-06, "loss": 0.0357, "num_tokens": 27617142.0, "reward": 0.958984375, "reward_std": 0.1532130092382431, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.98828125, "step": 109 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.15234375, "calib/ece": 0.45134387351778654, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3241106719367589, "calib/gap": -0.0027101449275359624, "calib/mean_conf": 0.8995652173913045, "calib/mu_c": 0.8980869565217394, "calib/mu_w": 0.9007971014492754, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4481818181818182, "calib/std_conf": 0.06555485811578877, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5879010494752623, "calib/step_q_c_n": 667.0, "calib/step_q_gap": 0.10748254727261913, "calib/step_q_w": 0.4804185022026432, "calib/step_q_w_n": 908.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 536.0859375, "completions/mean_terminated_length": 538.1882934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.11733333333333333, "grad_norm": 0.19876490533351898, "learning_rate": 2.5277777777777778e-06, "loss": 0.0254, "num_tokens": 27883108.0, "reward": 0.943359375, "reward_std": 0.29334381222724915, "rewards/accuracy_reward_step": 0.44921875, "rewards/format_reward_step": 0.98828125, "step": 110 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.62890625, "calib/ece": 0.38494071146245074, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.33201581027667987, "calib/gap": 0.013279549718574035, "calib/mean_conf": 0.8987747035573124, "calib/mu_c": 0.9052307692307693, "calib/mu_w": 0.8919512195121952, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38494071146245074, "calib/std_conf": 0.0745611709527481, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5369295774647888, "calib/step_q_c_n": 710.0, "calib/step_q_gap": 0.11332268790045236, "calib/step_q_w": 0.4236068895643364, "calib/step_q_w_n": 987.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2428.0, "completions/max_terminated_length": 2428.0, "completions/mean_length": 533.6484375, "completions/mean_terminated_length": 535.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.1184, "grad_norm": 0.21412035822868347, "learning_rate": 2.5e-06, "loss": 0.0424, "num_tokens": 28150938.0, "reward": 1.001953125, "reward_std": 0.2785911560058594, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.98828125, "step": 111 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 6.67578125, "calib/ece": 0.35592000000000024, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.22, "calib/gap": 0.012882205513784384, "calib/mean_conf": 0.8735200000000001, "calib/mu_c": 0.8795488721804512, "calib/mu_w": 0.8666666666666668, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34872000000000025, "calib/std_conf": 0.12912788080039106, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4987410071942446, "calib/step_q_c_n": 834.0, "calib/step_q_gap": 0.12077529290853029, "calib/step_q_w": 0.3779657142857143, "calib/step_q_w_n": 875.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1860.0, "completions/max_terminated_length": 1860.0, "completions/mean_length": 554.765625, "completions/mean_terminated_length": 565.8167724609375, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.11946666666666667, "grad_norm": 0.2119046002626419, "learning_rate": 2.4722222222222226e-06, "loss": -0.0201, "num_tokens": 28424686.0, "reward": 1.0078125, "reward_std": 0.2706044018268585, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.9765625, "step": 112 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.25003937007874033, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16141732283464566, "calib/gap": 0.018676865271068155, "calib/mean_conf": 0.8862598425196851, "calib/mu_c": 0.8930246913580248, "calib/mu_w": 0.8743478260869566, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24925196850393716, "calib/std_conf": 0.05934507556382405, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.4699704433497537, "calib/step_q_c_n": 1015.0, "calib/step_q_gap": 0.09557176130692008, "calib/step_q_w": 0.3743986820428336, "calib/step_q_w_n": 607.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 524.54296875, "completions/mean_terminated_length": 524.54296875, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 0.12053333333333334, "grad_norm": 0.21917535364627838, "learning_rate": 2.4444444444444447e-06, "loss": 0.0458, "num_tokens": 28687977.0, "reward": 1.126953125, "reward_std": 0.25684428215026855, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.98828125, "step": 113 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.5234375, "calib/ece": 0.2729644268774706, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16600790513833993, "calib/gap": 0.019191118160190257, "calib/mean_conf": 0.8895652173913045, "calib/mu_c": 0.896923076923077, "calib/mu_w": 0.8777319587628868, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2729644268774706, "calib/std_conf": 0.07450633773909882, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4834255319148936, "calib/step_q_c_n": 940.0, "calib/step_q_gap": 0.09075429903818133, "calib/step_q_w": 0.3926712328767123, "calib/step_q_w_n": 730.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 507.86328125, "completions/mean_terminated_length": 507.86328125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.1216, "grad_norm": 0.2042660415172577, "learning_rate": 2.4166666666666667e-06, "loss": 0.0274, "num_tokens": 28946822.0, "reward": 1.103515625, "reward_std": 0.26051056385040283, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.98828125, "step": 114 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.265625, "calib/ece": 0.3421176470588237, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.17647058823529413, "calib/gap": 0.016167701863354034, "calib/mean_conf": 0.8911372549019608, "calib/mu_c": 0.8984285714285716, "calib/mu_w": 0.8822608695652175, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3421176470588237, "calib/std_conf": 0.046827601885498976, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4858668197474168, "calib/step_q_c_n": 871.0, "calib/step_q_gap": 0.08058305439953145, "calib/step_q_w": 0.40528376534788535, "calib/step_q_w_n": 733.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2410.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 504.40625, "completions/mean_terminated_length": 504.40625, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.12266666666666666, "grad_norm": 0.19513730704784393, "learning_rate": 2.388888888888889e-06, "loss": 0.0299, "num_tokens": 29205022.0, "reward": 1.044921875, "reward_std": 0.2489628791809082, "rewards/accuracy_reward_step": 0.546875, "rewards/format_reward_step": 0.99609375, "step": 115 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.33984375, "calib/ece": 0.3352755905511812, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.18503937007874016, "calib/gap": -0.005189229900207093, "calib/mean_conf": 0.8903937007874017, "calib/mu_c": 0.888085106382979, "calib/mu_w": 0.8932743362831861, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3352755905511812, "calib/std_conf": 0.07678481615408075, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4639470782800441, "calib/step_q_c_n": 907.0, "calib/step_q_gap": 0.07233185194259556, "calib/step_q_w": 0.39161522633744855, "calib/step_q_w_n": 972.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 595.77734375, "completions/mean_terminated_length": 595.77734375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.12373333333333333, "grad_norm": 0.16184002161026, "learning_rate": 2.361111111111111e-06, "loss": 0.0391, "num_tokens": 29485869.0, "reward": 1.046875, "reward_std": 0.19438795745372772, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.9921875, "step": 116 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.7890625, "calib/ece": 0.42549019607843147, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.12941176470588237, "calib/gap": 0.0152452619843928, "calib/mean_conf": 0.8843137254901962, "calib/mu_c": 0.892564102564103, "calib/mu_w": 0.8773188405797102, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42549019607843147, "calib/std_conf": 0.08700811100258624, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4785615491009682, "calib/step_q_c_n": 723.0, "calib/step_q_gap": 0.05154677077584502, "calib/step_q_w": 0.4270147783251232, "calib/step_q_w_n": 1015.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2316.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 548.05859375, "completions/mean_terminated_length": 548.05859375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.1248, "grad_norm": 0.2146761268377304, "learning_rate": 2.3333333333333336e-06, "loss": 0.0386, "num_tokens": 29756580.0, "reward": 0.953125, "reward_std": 0.2651212811470032, "rewards/accuracy_reward_step": 0.45703125, "rewards/format_reward_step": 0.9921875, "step": 117 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.3526771653543309, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16535433070866143, "calib/gap": 0.006148175912043974, "calib/mean_conf": 0.8920472440944882, "calib/mu_c": 0.8948550724637683, "calib/mu_w": 0.8887068965517243, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35070866141732304, "calib/std_conf": 0.05002107560035077, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.40357984994640944, "calib/step_q_c_n": 933.0, "calib/step_q_gap": 0.008590116886861177, "calib/step_q_w": 0.39498973305954826, "calib/step_q_w_n": 974.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2829.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 560.59765625, "completions/mean_terminated_length": 562.7960815429688, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.12586666666666665, "grad_norm": 0.18381370604038239, "learning_rate": 2.305555555555556e-06, "loss": -0.0124, "num_tokens": 30027909.0, "reward": 1.033203125, "reward_std": 0.21692538261413574, "rewards/accuracy_reward_step": 0.5390625, "rewards/format_reward_step": 0.98828125, "step": 118 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.36031496062992147, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.12598425196850394, "calib/gap": 0.019348258706467503, "calib/mean_conf": 0.8878740157480315, "calib/mu_c": 0.8970149253731343, "calib/mu_w": 0.8776666666666668, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.36031496062992147, "calib/std_conf": 0.06190709220255889, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.47093023255813954, "calib/step_q_c_n": 860.0, "calib/step_q_gap": 0.13102104991434538, "calib/step_q_w": 0.33990918264379416, "calib/step_q_w_n": 991.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2648.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 598.7109375, "completions/mean_terminated_length": 603.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.12693333333333334, "grad_norm": 0.20627912878990173, "learning_rate": 2.277777777777778e-06, "loss": -0.0183, "num_tokens": 30310051.0, "reward": 1.013671875, "reward_std": 0.2583560347557068, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.98046875, "step": 119 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.734375, "calib/ece": 0.2530708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.19291338582677164, "calib/gap": -0.009013550135501092, "calib/mean_conf": 0.8892913385826774, "calib/mu_c": 0.8860975609756101, "calib/mu_w": 0.8951111111111112, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24834645669291341, "calib/std_conf": 0.07883683041885726, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.4628971962616822, "calib/step_q_c_n": 1070.0, "calib/step_q_gap": 0.0582748721026608, "calib/step_q_w": 0.4046223241590214, "calib/step_q_w_n": 654.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2038.0, "completions/max_terminated_length": 2038.0, "completions/mean_length": 549.375, "completions/mean_terminated_length": 551.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.128, "grad_norm": 0.21776804327964783, "learning_rate": 2.25e-06, "loss": -0.0059, "num_tokens": 30581187.0, "reward": 1.1328125, "reward_std": 0.24949431419372559, "rewards/accuracy_reward_step": 0.640625, "rewards/format_reward_step": 0.984375, "step": 120 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.37890625, "calib/ece": 0.3217968750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.1171875, "calib/gap": 0.020435865504358497, "calib/mean_conf": 0.8921093750000002, "calib/mu_c": 0.9008904109589043, "calib/mu_w": 0.8804545454545458, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3217968750000001, "calib/std_conf": 0.0668154214018693, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3749952785646837, "calib/step_q_c_n": 1059.0, "calib/step_q_gap": 0.01505166410685238, "calib/step_q_w": 0.3599436144578313, "calib/step_q_w_n": 830.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1310.0, "completions/max_terminated_length": 1310.0, "completions/mean_length": 579.25390625, "completions/mean_terminated_length": 581.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.12906666666666666, "grad_norm": 0.23283204436302185, "learning_rate": 2.222222222222222e-06, "loss": 0.0246, "num_tokens": 30858340.0, "reward": 1.06640625, "reward_std": 0.3377421498298645, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.9921875, "step": 121 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.3295238095238098, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.13095238095238096, "calib/gap": -0.007588374927824604, "calib/mean_conf": 0.8834920634920637, "calib/mu_c": 0.8802097902097904, "calib/mu_w": 0.887798165137615, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.322777777777778, "calib/std_conf": 0.08854930883830771, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4114647177419355, "calib/step_q_c_n": 992.0, "calib/step_q_gap": 0.05212340037666596, "calib/step_q_w": 0.35934131736526953, "calib/step_q_w_n": 835.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2699.0, "completions/max_terminated_length": 2699.0, "completions/mean_length": 552.42578125, "completions/mean_terminated_length": 556.7755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.13013333333333332, "grad_norm": 0.18356913328170776, "learning_rate": 2.1944444444444445e-06, "loss": -0.0261, "num_tokens": 31130913.0, "reward": 1.0546875, "reward_std": 0.20936806499958038, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.984375, "step": 122 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.73046875, "calib/ece": 0.420830039525692, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09090909090909091, "calib/gap": -0.00712216229775442, "calib/mean_conf": 0.888814229249012, "calib/mu_c": 0.8850420168067232, "calib/mu_w": 0.8921641791044777, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41964426877470384, "calib/std_conf": 0.052556924485385015, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.39687116564417185, "calib/step_q_c_n": 815.0, "calib/step_q_gap": 0.048168416503278355, "calib/step_q_w": 0.3487027491408935, "calib/step_q_w_n": 1164.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2748.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 642.26171875, "completions/mean_terminated_length": 642.26171875, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 0.1312, "grad_norm": 0.23982587456703186, "learning_rate": 2.166666666666667e-06, "loss": 0.0348, "num_tokens": 31424428.0, "reward": 0.958984375, "reward_std": 0.33340585231781006, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.98828125, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.78125, "calib/ece": 0.25070312500000025, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.022361556064073196, "calib/mean_conf": 0.8764843750000002, "calib/mu_c": 0.8847826086956525, "calib/mu_w": 0.8624210526315793, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24914062500000023, "calib/std_conf": 0.09076575142012198, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3849477682811016, "calib/step_q_c_n": 1053.0, "calib/step_q_gap": -0.023632026740860346, "calib/step_q_w": 0.40857979502196196, "calib/step_q_w_n": 683.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 535.1875, "completions/mean_terminated_length": 537.2863159179688, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.13226666666666667, "grad_norm": 0.22693496942520142, "learning_rate": 2.138888888888889e-06, "loss": 0.0037, "num_tokens": 31692060.0, "reward": 1.12890625, "reward_std": 0.20502206683158875, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 1.0, "step": 124 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.37373015873015897, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": -0.006484133493155242, "calib/mean_conf": 0.8935714285714287, "calib/mu_c": 0.8904580152671756, "calib/mu_w": 0.8969421487603308, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37373015873015897, "calib/std_conf": 0.043378443291612334, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3743412797992472, "calib/step_q_c_n": 797.0, "calib/step_q_gap": 0.013107655345098723, "calib/step_q_w": 0.36123362445414847, "calib/step_q_w_n": 916.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2746.0, "completions/max_terminated_length": 2746.0, "completions/mean_length": 582.0703125, "completions/mean_terminated_length": 584.3529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.13333333333333333, "grad_norm": 0.23350414633750916, "learning_rate": 2.1111111111111114e-06, "loss": 0.043, "num_tokens": 31969686.0, "reward": 1.001953125, "reward_std": 0.34277743101119995, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.98046875, "step": 125 }, { "calib/answer_extract_rate": 0.96484375, "calib/avg_num_step_conf": 8.22265625, "calib/ece": 0.30871485943775123, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.08032128514056225, "calib/gap": 0.03169217574877958, "calib/mean_conf": 0.8830120481927713, "calib/mu_c": 0.8965034965034968, "calib/mu_w": 0.8648113207547172, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.30871485943775123, "calib/std_conf": 0.09798862658073332, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.39058176100628933, "calib/step_q_c_n": 954.0, "calib/step_q_gap": 0.03129852903409125, "calib/step_q_w": 0.3592832319721981, "calib/step_q_w_n": 1151.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 650.6015625, "completions/mean_terminated_length": 650.6015625, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.1344, "grad_norm": 0.22082242369651794, "learning_rate": 2.0833333333333334e-06, "loss": 0.0527, "num_tokens": 32265512.0, "reward": 1.0390625, "reward_std": 0.28105270862579346, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.9609375, "step": 126 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.265625, "calib/ece": 0.3551984126984128, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.02467424242424232, "calib/mean_conf": 0.8790079365079367, "calib/mu_c": 0.8907575757575759, "calib/mu_w": 0.8660833333333335, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3551984126984128, "calib/std_conf": 0.10854104600182629, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3945838084378563, "calib/step_q_c_n": 877.0, "calib/step_q_gap": 0.036750644653522624, "calib/step_q_w": 0.35783316378433366, "calib/step_q_w_n": 983.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 570.44140625, "completions/mean_terminated_length": 572.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.13546666666666668, "grad_norm": 0.25337034463882446, "learning_rate": 2.0555555555555555e-06, "loss": 0.085, "num_tokens": 32539025.0, "reward": 1.005859375, "reward_std": 0.2718546390533447, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.98046875, "step": 127 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.375, "calib/ece": 0.30916996047430867, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": 0.0035153256704979485, "calib/mean_conf": 0.8822924901185771, "calib/mu_c": 0.8837931034482761, "calib/mu_w": 0.8802777777777782, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30916996047430867, "calib/std_conf": 0.08811307763056817, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3980301274623407, "calib/step_q_c_n": 863.0, "calib/step_q_gap": 0.05445405464049413, "calib/step_q_w": 0.34357607282184655, "calib/step_q_w_n": 769.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1889.0, "completions/max_terminated_length": 1889.0, "completions/mean_length": 556.171875, "completions/mean_terminated_length": 562.766845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.13653333333333334, "grad_norm": 0.23876334726810455, "learning_rate": 2.027777777777778e-06, "loss": -0.0019, "num_tokens": 32811877.0, "reward": 1.056640625, "reward_std": 0.3122556209564209, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.98046875, "step": 128 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.875, "calib/ece": 0.2761023622047246, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.006414467253176959, "calib/mean_conf": 0.8863385826771656, "calib/mu_c": 0.8888387096774196, "calib/mu_w": 0.8824242424242427, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2761023622047246, "calib/std_conf": 0.03612613956951282, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.361572265625, "calib/step_q_c_n": 1024.0, "calib/step_q_gap": 0.016531504755434745, "calib/step_q_w": 0.34504076086956526, "calib/step_q_w_n": 736.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1549.0, "completions/max_terminated_length": 1549.0, "completions/mean_length": 517.47265625, "completions/mean_terminated_length": 519.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.1376, "grad_norm": 0.20662395656108856, "learning_rate": 2.0000000000000003e-06, "loss": -0.0179, "num_tokens": 33070542.0, "reward": 1.1015625, "reward_std": 0.21173818409442902, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.9921875, "step": 129 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.6484375, "calib/ece": 0.25928571428571456, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": -0.013150892672503023, "calib/mean_conf": 0.8799206349206351, "calib/mu_c": 0.875276073619632, "calib/mu_w": 0.888426966292135, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2461904761904765, "calib/std_conf": 0.10477107343235681, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3432317636195753, "calib/step_q_c_n": 1083.0, "calib/step_q_gap": -0.023908785653445674, "calib/step_q_w": 0.367140549273021, "calib/step_q_w_n": 619.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1711.0, "completions/max_terminated_length": 1711.0, "completions/mean_length": 497.54296875, "completions/mean_terminated_length": 503.4427185058594, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.13866666666666666, "grad_norm": 0.20441967248916626, "learning_rate": 1.9722222222222224e-06, "loss": -0.0314, "num_tokens": 33327009.0, "reward": 1.12890625, "reward_std": 0.1633341908454895, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.984375, "step": 130 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.4560629921259844, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": -0.0062163876204970325, "calib/mean_conf": 0.8781102362204726, "calib/mu_c": 0.8745370370370374, "calib/mu_w": 0.8807534246575345, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4544881889763781, "calib/std_conf": 0.07303577252585294, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3701863354037267, "calib/step_q_c_n": 644.0, "calib/step_q_gap": 0.050687304395974686, "calib/step_q_w": 0.319499031007752, "calib/step_q_w_n": 1032.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1929.0, "completions/max_terminated_length": 1929.0, "completions/mean_length": 504.05078125, "completions/mean_terminated_length": 508.0196838378906, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.13973333333333332, "grad_norm": 0.21228958666324615, "learning_rate": 1.944444444444445e-06, "loss": -0.0192, "num_tokens": 33586062.0, "reward": 0.91796875, "reward_std": 0.19925057888031006, "rewards/accuracy_reward_step": 0.421875, "rewards/format_reward_step": 0.9921875, "step": 131 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 7.31640625, "calib/ece": 0.25726562500000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.07421875, "calib/gap": 0.008959693911207989, "calib/mean_conf": 0.8939843750000003, "calib/mu_c": 0.8972392638036814, "calib/mu_w": 0.8882795698924734, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25726562500000005, "calib/std_conf": 0.03220636204012142, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4208199643493762, "calib/step_q_c_n": 1122.0, "calib/step_q_gap": 0.1147081134838635, "calib/step_q_w": 0.3061118508655127, "calib/step_q_w_n": 751.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1230.0, "completions/max_terminated_length": 1230.0, "completions/mean_length": 543.98828125, "completions/mean_terminated_length": 546.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.1408, "grad_norm": 0.2530343532562256, "learning_rate": 1.916666666666667e-06, "loss": 0.0204, "num_tokens": 33854723.0, "reward": 1.13671875, "reward_std": 0.26170387864112854, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 1.0, "step": 132 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.79296875, "calib/ece": 0.38494071146245096, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.03768504626156555, "calib/mean_conf": 0.8750592885375496, "calib/mu_c": 0.8942741935483874, "calib/mu_w": 0.8565891472868219, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38494071146245096, "calib/std_conf": 0.113960746842225, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33913721413721415, "calib/step_q_c_n": 962.0, "calib/step_q_gap": 0.015032664282422281, "calib/step_q_w": 0.32410454985479187, "calib/step_q_w_n": 1033.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2496.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 615.33203125, "completions/mean_terminated_length": 620.1771850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.14186666666666667, "grad_norm": 0.22492097318172455, "learning_rate": 1.888888888888889e-06, "loss": -0.0081, "num_tokens": 34142400.0, "reward": 0.9765625, "reward_std": 0.32500171661376953, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.984375, "step": 133 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.8671875, "calib/ece": 0.3523046875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01953125, "calib/gap": 0.00012329019198920044, "calib/mean_conf": 0.8874609375000002, "calib/mu_c": 0.8875182481751827, "calib/mu_w": 0.8873949579831935, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3523046875000002, "calib/std_conf": 0.04205881342383654, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3872364039955605, "calib/step_q_c_n": 901.0, "calib/step_q_gap": 0.08064363853465034, "calib/step_q_w": 0.30659276546091013, "calib/step_q_w_n": 857.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1417.0, "completions/max_terminated_length": 1417.0, "completions/mean_length": 590.69140625, "completions/mean_terminated_length": 593.0078735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.14293333333333333, "grad_norm": 0.2229323834180832, "learning_rate": 1.8611111111111113e-06, "loss": 0.024, "num_tokens": 34426377.0, "reward": 1.03125, "reward_std": 0.23475275933742523, "rewards/accuracy_reward_step": 0.53515625, "rewards/format_reward_step": 0.9921875, "step": 134 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.8828125, "calib/ece": 0.30433070866141765, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.008465068842427259, "calib/mean_conf": 0.8870078740157482, "calib/mu_c": 0.8905405405405408, "calib/mu_w": 0.8820754716981135, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30433070866141765, "calib/std_conf": 0.04148455666795637, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3566565040650406, "calib/step_q_c_n": 984.0, "calib/step_q_gap": 0.03763336781825399, "calib/step_q_w": 0.31902313624678663, "calib/step_q_w_n": 778.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 584.2734375, "completions/mean_terminated_length": 586.5647583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.144, "grad_norm": 0.22317829728126526, "learning_rate": 1.8333333333333333e-06, "loss": -0.0075, "num_tokens": 34705639.0, "reward": 1.068359375, "reward_std": 0.2599042057991028, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.98046875, "step": 135 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.6484375, "calib/ece": 0.39130434782608736, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.028525881470367942, "calib/mean_conf": 0.8814229249011859, "calib/mu_c": 0.8959677419354842, "calib/mu_w": 0.8674418604651163, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39130434782608736, "calib/std_conf": 0.08511066376839702, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.4052961275626423, "calib/step_q_c_n": 878.0, "calib/step_q_gap": 0.12261094237745712, "calib/step_q_w": 0.2826851851851852, "calib/step_q_w_n": 1080.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2334.0, "completions/max_terminated_length": 2334.0, "completions/mean_length": 575.875, "completions/mean_terminated_length": 580.409423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.14506666666666668, "grad_norm": 0.20847293734550476, "learning_rate": 1.8055555555555557e-06, "loss": -0.0167, "num_tokens": 34985359.0, "reward": 0.9765625, "reward_std": 0.21726368367671967, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.984375, "step": 136 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.76953125, "calib/ece": 0.30309803921568634, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": 0.00392018186410692, "calib/mean_conf": 0.8834901960784315, "calib/mu_c": 0.8851351351351353, "calib/mu_w": 0.8812149532710284, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30309803921568634, "calib/std_conf": 0.03878516623103988, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3372983870967742, "calib/step_q_c_n": 992.0, "calib/step_q_gap": -0.0003534347655739789, "calib/step_q_w": 0.33765182186234816, "calib/step_q_w_n": 741.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1237.0, "completions/max_terminated_length": 1237.0, "completions/mean_length": 517.1015625, "completions/mean_terminated_length": 519.1294555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.14613333333333334, "grad_norm": 0.20667599141597748, "learning_rate": 1.777777777777778e-06, "loss": -0.0081, "num_tokens": 35248529.0, "reward": 1.076171875, "reward_std": 0.15070918202400208, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.99609375, "step": 137 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.8828125, "calib/ece": 0.3109842519685041, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": 0.007436582109479284, "calib/mean_conf": 0.8897244094488189, "calib/mu_c": 0.8928571428571432, "calib/mu_w": 0.885420560747664, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3109842519685041, "calib/std_conf": 0.04171900323534182, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3578183716075157, "calib/step_q_c_n": 958.0, "calib/step_q_gap": 0.017830809418460936, "calib/step_q_w": 0.33998756218905474, "calib/step_q_w_n": 804.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1838.0, "completions/max_terminated_length": 1838.0, "completions/mean_length": 559.6796875, "completions/mean_terminated_length": 561.8745727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1472, "grad_norm": 0.28204968571662903, "learning_rate": 1.75e-06, "loss": 0.0007, "num_tokens": 35519951.0, "reward": 1.07421875, "reward_std": 0.31372708082199097, "rewards/accuracy_reward_step": 0.578125, "rewards/format_reward_step": 0.9921875, "step": 138 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.2312992125984255, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.004443521594684197, "calib/mean_conf": 0.8895669291338584, "calib/mu_c": 0.8910714285714288, "calib/mu_w": 0.8866279069767447, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22972440944881922, "calib/std_conf": 0.04137652926520479, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.39311043566362713, "calib/step_q_c_n": 987.0, "calib/step_q_gap": 0.03943522198841343, "calib/step_q_w": 0.3536752136752137, "calib/step_q_w_n": 585.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1291.0, "completions/max_terminated_length": 1291.0, "completions/mean_length": 483.73828125, "completions/mean_terminated_length": 487.5472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.14826666666666666, "grad_norm": 0.2560315728187561, "learning_rate": 1.7222222222222224e-06, "loss": 0.0195, "num_tokens": 35770692.0, "reward": 1.15234375, "reward_std": 0.22398819029331207, "rewards/accuracy_reward_step": 0.65625, "rewards/format_reward_step": 0.9921875, "step": 139 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.2891338582677168, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.003798615155632734, "calib/mean_conf": 0.8914960629921262, "calib/mu_c": 0.8930065359477124, "calib/mu_w": 0.8892079207920797, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2891338582677168, "calib/std_conf": 0.0386296516228139, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4036391752577319, "calib/step_q_c_n": 970.0, "calib/step_q_gap": 0.06558171971737625, "calib/step_q_w": 0.33805745554035566, "calib/step_q_w_n": 731.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 554.55078125, "completions/mean_terminated_length": 558.9172973632812, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.14933333333333335, "grad_norm": 0.241206556558609, "learning_rate": 1.6944444444444446e-06, "loss": -0.0029, "num_tokens": 36041481.0, "reward": 1.09375, "reward_std": 0.19005943834781647, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9921875, "step": 140 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.8359375, "calib/ece": 0.23257812500000025, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.02734375, "calib/gap": 0.013732262382864957, "calib/mean_conf": 0.8810156250000002, "calib/mu_c": 0.8858433734939763, "calib/mu_w": 0.8721111111111114, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23257812500000025, "calib/std_conf": 0.047324343691797506, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3931985294117647, "calib/step_q_c_n": 1088.0, "calib/step_q_gap": 0.08118946596765592, "calib/step_q_w": 0.3120090634441088, "calib/step_q_w_n": 662.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1694.0, "completions/max_terminated_length": 1694.0, "completions/mean_length": 578.109375, "completions/mean_terminated_length": 580.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1504, "grad_norm": 0.19748741388320923, "learning_rate": 1.6666666666666667e-06, "loss": -0.0073, "num_tokens": 36320381.0, "reward": 1.14453125, "reward_std": 0.16845659911632538, "rewards/accuracy_reward_step": 0.6484375, "rewards/format_reward_step": 0.9921875, "step": 141 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.015625, "calib/ece": 0.3238492063492067, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": 0.002469365496888587, "calib/mean_conf": 0.891309523809524, "calib/mu_c": 0.8923776223776227, "calib/mu_w": 0.8899082568807342, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3238492063492067, "calib/std_conf": 0.03944394541538628, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.37628865979381443, "calib/step_q_c_n": 970.0, "calib/step_q_gap": 0.032826190060158245, "calib/step_q_w": 0.3434624697336562, "calib/step_q_w_n": 826.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 567.62890625, "completions/mean_terminated_length": 574.3596801757812, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.15146666666666667, "grad_norm": 0.2087187021970749, "learning_rate": 1.638888888888889e-06, "loss": -0.0134, "num_tokens": 36594662.0, "reward": 1.05078125, "reward_std": 0.20927922427654266, "rewards/accuracy_reward_step": 0.55859375, "rewards/format_reward_step": 0.984375, "step": 142 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.66015625, "calib/ece": 0.3233864541832673, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.0018665377176013687, "calib/mean_conf": 0.8851394422310759, "calib/mu_c": 0.885957446808511, "calib/mu_w": 0.8840909090909096, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3233864541832673, "calib/std_conf": 0.036394809375647104, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4246967779056387, "calib/step_q_c_n": 869.0, "calib/step_q_gap": 0.15248066068952149, "calib/step_q_w": 0.2722161172161172, "calib/step_q_w_n": 1092.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2847.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 594.078125, "completions/mean_terminated_length": 596.4078979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.15253333333333333, "grad_norm": 0.22333618998527527, "learning_rate": 1.6111111111111113e-06, "loss": 0.0154, "num_tokens": 36877890.0, "reward": 1.041015625, "reward_std": 0.20484405755996704, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98046875, "step": 143 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.05859375, "calib/ece": 0.25389763779527597, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": -7.246376811631539e-05, "calib/mean_conf": 0.8916929133858269, "calib/mu_c": 0.8916666666666667, "calib/mu_w": 0.891739130434783, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25389763779527597, "calib/std_conf": 0.030129121552543325, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3650319051959891, "calib/step_q_c_n": 1097.0, "calib/step_q_gap": 0.024609369984721463, "calib/step_q_w": 0.34042253521126764, "calib/step_q_w_n": 710.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 583.72265625, "completions/mean_terminated_length": 583.72265625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.1536, "grad_norm": 0.24799968302249908, "learning_rate": 1.5833333333333333e-06, "loss": 0.0299, "num_tokens": 37155259.0, "reward": 1.12890625, "reward_std": 0.27356091141700745, "rewards/accuracy_reward_step": 0.6328125, "rewards/format_reward_step": 0.9921875, "step": 144 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 7.265625, "calib/ece": 0.244685039370079, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.011728395061728292, "calib/mean_conf": 0.8824803149606301, "calib/mu_c": 0.8867283950617286, "calib/mu_w": 0.8750000000000003, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.244685039370079, "calib/std_conf": 0.06549440685306386, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.417799671592775, "calib/step_q_c_n": 1218.0, "calib/step_q_gap": 0.03429499869557873, "calib/step_q_w": 0.38350467289719625, "calib/step_q_w_n": 642.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1682.0, "completions/max_terminated_length": 1682.0, "completions/mean_length": 536.359375, "completions/mean_terminated_length": 538.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.15466666666666667, "grad_norm": 0.24093927443027496, "learning_rate": 1.5555555555555558e-06, "loss": 0.0184, "num_tokens": 37419079.0, "reward": 1.1328125, "reward_std": 0.2124004065990448, "rewards/accuracy_reward_step": 0.63671875, "rewards/format_reward_step": 0.9921875, "step": 145 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 8.109375, "calib/ece": 0.4515294117647062, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": 0.016344280719280624, "calib/mean_conf": 0.8907450980392159, "calib/mu_c": 0.8999107142857145, "calib/mu_w": 0.8835664335664338, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4515294117647062, "calib/std_conf": 0.06471531719687658, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3966867469879518, "calib/step_q_c_n": 830.0, "calib/step_q_gap": 0.026494130615560163, "calib/step_q_w": 0.37019261637239165, "calib/step_q_w_n": 1246.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1985.0, "completions/max_terminated_length": 1985.0, "completions/mean_length": 618.1015625, "completions/mean_terminated_length": 618.1015625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.15573333333333333, "grad_norm": 0.25144022703170776, "learning_rate": 1.527777777777778e-06, "loss": 0.028, "num_tokens": 37708337.0, "reward": 0.931640625, "reward_std": 0.27421265840530396, "rewards/accuracy_reward_step": 0.4375, "rewards/format_reward_step": 0.98828125, "step": 146 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.38838582677165373, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": -0.010448085396884799, "calib/mean_conf": 0.8844488188976379, "calib/mu_c": 0.8793893129770993, "calib/mu_w": 0.8898373983739841, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3785433070866144, "calib/std_conf": 0.0885373899984569, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.44844789356984477, "calib/step_q_c_n": 902.0, "calib/step_q_gap": 0.0846992362121648, "calib/step_q_w": 0.36374865735767997, "calib/step_q_w_n": 931.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2167.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 578.62890625, "completions/mean_terminated_length": 580.8980712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.1568, "grad_norm": 0.20188067853450775, "learning_rate": 1.5e-06, "loss": 0.006, "num_tokens": 37983954.0, "reward": 1.005859375, "reward_std": 0.1799573004245758, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.98828125, "step": 147 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.87890625, "calib/ece": 0.27743083003952596, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.008956550362080429, "calib/mean_conf": 0.8900790513833993, "calib/mu_c": 0.8935483870967745, "calib/mu_w": 0.8845918367346941, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27743083003952596, "calib/std_conf": 0.060085525966913855, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.43912957467853614, "calib/step_q_c_n": 1011.0, "calib/step_q_gap": 0.013929574678536172, "calib/step_q_w": 0.42519999999999997, "calib/step_q_w_n": 750.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2380.0, "completions/max_terminated_length": 2380.0, "completions/mean_length": 550.26953125, "completions/mean_terminated_length": 554.6023559570312, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.15786666666666666, "grad_norm": 0.24239996075630188, "learning_rate": 1.4722222222222225e-06, "loss": -0.0064, "num_tokens": 38253743.0, "reward": 1.09765625, "reward_std": 0.23398509621620178, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.984375, "step": 148 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.8359375, "calib/ece": 0.3885375494071147, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.007246874999999875, "calib/mean_conf": 0.8944664031620555, "calib/mu_c": 0.8980468750000002, "calib/mu_w": 0.8908000000000004, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3885375494071147, "calib/std_conf": 0.0375774806642311, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4303410475030451, "calib/step_q_c_n": 821.0, "calib/step_q_gap": 0.07758999265072442, "calib/step_q_w": 0.35275105485232067, "calib/step_q_w_n": 1185.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 627.0859375, "completions/mean_terminated_length": 627.0859375, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.15893333333333334, "grad_norm": 0.23352481424808502, "learning_rate": 1.4444444444444445e-06, "loss": 0.0634, "num_tokens": 38542541.0, "reward": 0.990234375, "reward_std": 0.28249531984329224, "rewards/accuracy_reward_step": 0.5, "rewards/format_reward_step": 0.98046875, "step": 149 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.06640625, "calib/ece": 0.31148594377510064, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": 0.0175535714285715, "calib/mean_conf": 0.8873895582329319, "calib/mu_c": 0.8947916666666669, "calib/mu_w": 0.8772380952380954, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31028112449799217, "calib/std_conf": 0.06665873083845503, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4666407982261641, "calib/step_q_c_n": 902.0, "calib/step_q_gap": 0.06709283791745407, "calib/step_q_w": 0.39954796030871004, "calib/step_q_w_n": 907.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2383.0, "completions/max_terminated_length": 2383.0, "completions/mean_length": 506.859375, "completions/mean_terminated_length": 514.90478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.16, "grad_norm": 0.2571173906326294, "learning_rate": 1.4166666666666667e-06, "loss": -0.0348, "num_tokens": 38801065.0, "reward": 1.048828125, "reward_std": 0.2649000883102417, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.97265625, "step": 150 }, { "calib/answer_extract_rate": 0.97265625, "calib/avg_num_step_conf": 8.09765625, "calib/ece": 0.3912350597609565, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00708571428571414, "calib/mean_conf": 0.8892430278884463, "calib/mu_c": 0.8928000000000004, "calib/mu_w": 0.8857142857142862, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3912350597609565, "calib/std_conf": 0.04070867128174436, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4377538829151732, "calib/step_q_c_n": 837.0, "calib/step_q_gap": 0.09190436835206639, "calib/step_q_w": 0.3458495145631068, "calib/step_q_w_n": 1236.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 612.34375, "completions/mean_terminated_length": 617.1653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.16106666666666666, "grad_norm": 0.24429307878017426, "learning_rate": 1.3888888888888892e-06, "loss": 0.0082, "num_tokens": 39088657.0, "reward": 0.974609375, "reward_std": 0.2938493490219116, "rewards/accuracy_reward_step": 0.48828125, "rewards/format_reward_step": 0.97265625, "step": 151 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.5625, "calib/ece": 0.36968503937007885, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": -0.006805684850742066, "calib/mean_conf": 0.8854330708661419, "calib/mu_c": 0.8821374045801527, "calib/mu_w": 0.8889430894308947, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36968503937007885, "calib/std_conf": 0.03714741659740444, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3964218258132214, "calib/step_q_c_n": 953.0, "calib/step_q_gap": 0.0075408492109833025, "calib/step_q_w": 0.3888809766022381, "calib/step_q_w_n": 983.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1946.0, "completions/max_terminated_length": 1946.0, "completions/mean_length": 561.98828125, "completions/mean_terminated_length": 564.1921997070312, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.16213333333333332, "grad_norm": 0.24398364126682281, "learning_rate": 1.3611111111111112e-06, "loss": 0.0061, "num_tokens": 39361726.0, "reward": 1.0, "reward_std": 0.28061729669570923, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.9765625, "step": 152 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.1484375, "calib/ece": 0.32159362549800813, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.0038603322949116725, "calib/mean_conf": 0.8952988047808766, "calib/mu_c": 0.8969444444444447, "calib/mu_w": 0.893084112149533, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32159362549800813, "calib/std_conf": 0.025565523164352975, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.459238683127572, "calib/step_q_c_n": 972.0, "calib/step_q_gap": 0.07415709804598691, "calib/step_q_w": 0.3850815850815851, "calib/step_q_w_n": 858.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2561.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 601.21484375, "completions/mean_terminated_length": 605.9487915039062, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.1632, "grad_norm": 0.24795496463775635, "learning_rate": 1.3333333333333334e-06, "loss": 0.0033, "num_tokens": 39646765.0, "reward": 1.046875, "reward_std": 0.3021770715713501, "rewards/accuracy_reward_step": 0.5625, "rewards/format_reward_step": 0.96875, "step": 153 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.90234375, "calib/ece": 0.3743650793650797, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.009522427607090878, "calib/mean_conf": 0.8942063492063493, "calib/mu_c": 0.8987786259541986, "calib/mu_w": 0.8892561983471077, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3743650793650797, "calib/std_conf": 0.02536568030756037, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4836067415730337, "calib/step_q_c_n": 890.0, "calib/step_q_gap": 0.0682133550279938, "calib/step_q_w": 0.4153933865450399, "calib/step_q_w_n": 877.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 545.1328125, "completions/mean_terminated_length": 547.2706298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.16426666666666667, "grad_norm": 0.2542245388031006, "learning_rate": 1.3055555555555556e-06, "loss": -0.001, "num_tokens": 39914567.0, "reward": 1.0078125, "reward_std": 0.2664416432380676, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.984375, "step": 154 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.6484375, "calib/ece": 0.3974609375000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.004560439560439833, "calib/mean_conf": 0.8833984375000002, "calib/mu_c": 0.8857142857142861, "calib/mu_w": 0.8811538461538463, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3943359375000002, "calib/std_conf": 0.09337306221581573, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5181536555142503, "calib/step_q_c_n": 807.0, "calib/step_q_gap": 0.09922628121257432, "calib/step_q_w": 0.418927374301676, "calib/step_q_w_n": 895.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 512.37109375, "completions/mean_terminated_length": 514.3804321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.16533333333333333, "grad_norm": 0.24787573516368866, "learning_rate": 1.2777777777777779e-06, "loss": -0.0004, "num_tokens": 40176758.0, "reward": 0.990234375, "reward_std": 0.20889762043952942, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 0.99609375, "step": 155 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.5546875, "calib/ece": 0.36360000000000037, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": 0.024446725254987145, "calib/mean_conf": 0.8876000000000002, "calib/mu_c": 0.8992366412213741, "calib/mu_w": 0.874789915966387, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36360000000000037, "calib/std_conf": 0.07018717831627085, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4461740041928721, "calib/step_q_c_n": 954.0, "calib/step_q_gap": 0.035439310315321104, "calib/step_q_w": 0.410734693877551, "calib/step_q_w_n": 980.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 581.359375, "completions/mean_terminated_length": 581.359375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.1664, "grad_norm": 0.23543541133403778, "learning_rate": 1.25e-06, "loss": 0.0225, "num_tokens": 40454154.0, "reward": 1.00390625, "reward_std": 0.28826624155044556, "rewards/accuracy_reward_step": 0.515625, "rewards/format_reward_step": 0.9765625, "step": 156 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.26778656126482253, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.01715100849256923, "calib/mean_conf": 0.8804347826086958, "calib/mu_c": 0.8869426751592361, "calib/mu_w": 0.8697916666666669, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2638339920948621, "calib/std_conf": 0.08412892829895907, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.4687330316742082, "calib/step_q_c_n": 1105.0, "calib/step_q_gap": 0.04959813431350435, "calib/step_q_w": 0.41913489736070386, "calib/step_q_w_n": 682.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2749.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 544.91015625, "completions/mean_terminated_length": 544.91015625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.16746666666666668, "grad_norm": 0.24358108639717102, "learning_rate": 1.2222222222222223e-06, "loss": 0.0189, "num_tokens": 40721187.0, "reward": 1.107421875, "reward_std": 0.27992281317710876, "rewards/accuracy_reward_step": 0.6171875, "rewards/format_reward_step": 0.98046875, "step": 157 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 7.54296875, "calib/ece": 0.30439215686274546, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.010980392156863195, "calib/mean_conf": 0.8855686274509805, "calib/mu_c": 0.8811764705882354, "calib/mu_w": 0.8921568627450986, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2949803921568631, "calib/std_conf": 0.084535467646858, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4779475982532751, "calib/step_q_c_n": 1145.0, "calib/step_q_gap": 0.017286020645132627, "calib/step_q_w": 0.46066157760814247, "calib/step_q_w_n": 786.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1700.0, "completions/max_terminated_length": 1700.0, "completions/mean_length": 537.83984375, "completions/mean_terminated_length": 539.9490356445312, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.16853333333333334, "grad_norm": 0.2743615210056305, "learning_rate": 1.1944444444444446e-06, "loss": 0.0309, "num_tokens": 40987922.0, "reward": 1.095703125, "reward_std": 0.279867947101593, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.99609375, "step": 158 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.29296875, "calib/ece": 0.3318972332015812, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04743083003952569, "calib/gap": 0.009967643700038109, "calib/mean_conf": 0.8931620553359686, "calib/mu_c": 0.8975352112676059, "calib/mu_w": 0.8875675675675678, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3318972332015812, "calib/std_conf": 0.02956003843798982, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4912575574365175, "calib/step_q_c_n": 827.0, "calib/step_q_gap": 0.050925924783456256, "calib/step_q_w": 0.44033163265306124, "calib/step_q_w_n": 784.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2662.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 519.953125, "completions/mean_terminated_length": 521.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.1696, "grad_norm": 0.2283385694026947, "learning_rate": 1.1666666666666668e-06, "loss": 0.0215, "num_tokens": 41249622.0, "reward": 1.048828125, "reward_std": 0.19320310652256012, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.98828125, "step": 159 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.19921875, "calib/ece": 0.33150197628458533, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.019762845849802372, "calib/gap": 0.00684397163120587, "calib/mean_conf": 0.8888142292490121, "calib/mu_c": 0.891843971631206, "calib/mu_w": 0.8850000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33150197628458533, "calib/std_conf": 0.06296508439496006, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5101843817787419, "calib/step_q_c_n": 922.0, "calib/step_q_gap": 0.07480978894486567, "calib/step_q_w": 0.43537459283387625, "calib/step_q_w_n": 921.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 528.37109375, "completions/mean_terminated_length": 530.4431762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.17066666666666666, "grad_norm": 0.2358379364013672, "learning_rate": 1.138888888888889e-06, "loss": 0.0009, "num_tokens": 41513533.0, "reward": 1.044921875, "reward_std": 0.23529572784900665, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98828125, "step": 160 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.1893359375000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0390625, "calib/gap": 0.01641812865497061, "calib/mean_conf": 0.8895703125000001, "calib/mu_c": 0.8944444444444447, "calib/mu_w": 0.8780263157894741, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1878906250000003, "calib/std_conf": 0.075131466401584, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.47634879725085916, "calib/step_q_c_n": 1164.0, "calib/step_q_gap": 0.023521145735707627, "calib/step_q_w": 0.45282765151515153, "calib/step_q_w_n": 528.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1357.0, "completions/max_terminated_length": 1357.0, "completions/mean_length": 502.0546875, "completions/mean_terminated_length": 504.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.17173333333333332, "grad_norm": 0.2519267499446869, "learning_rate": 1.111111111111111e-06, "loss": 0.0282, "num_tokens": 41769787.0, "reward": 1.203125, "reward_std": 0.19977852702140808, "rewards/accuracy_reward_step": 0.703125, "rewards/format_reward_step": 1.0, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.87890625, "calib/ece": 0.2341568627450984, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.006736617586349025, "calib/mean_conf": 0.8906274509803923, "calib/mu_c": 0.8928994082840239, "calib/mu_w": 0.8861627906976749, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2310196078431376, "calib/std_conf": 0.05056929269913447, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5035537190082645, "calib/step_q_c_n": 1089.0, "calib/step_q_gap": 0.10638109996064543, "calib/step_q_w": 0.3971726190476191, "calib/step_q_w_n": 672.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 513.66796875, "completions/mean_terminated_length": 513.66796875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.1728, "grad_norm": 0.2774161100387573, "learning_rate": 1.0833333333333335e-06, "loss": 0.0425, "num_tokens": 42029238.0, "reward": 1.158203125, "reward_std": 0.2824953496456146, "rewards/accuracy_reward_step": 0.66015625, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.61328125, "calib/ece": 0.3974501992031875, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.0136626873253749, "calib/mean_conf": 0.8914741035856576, "calib/mu_c": 0.8983870967741939, "calib/mu_w": 0.884724409448819, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3974501992031875, "calib/std_conf": 0.03204544716426028, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5095477386934674, "calib/step_q_c_n": 796.0, "calib/step_q_gap": 0.07791720963882726, "calib/step_q_w": 0.4316305290546401, "calib/step_q_w_n": 1153.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2408.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 579.25390625, "completions/mean_terminated_length": 581.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.17386666666666667, "grad_norm": 0.26556578278541565, "learning_rate": 1.0555555555555557e-06, "loss": -0.0127, "num_tokens": 42306167.0, "reward": 0.978515625, "reward_std": 0.2926446497440338, "rewards/accuracy_reward_step": 0.4921875, "rewards/format_reward_step": 0.97265625, "step": 163 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.546875, "calib/ece": 0.3106324110671939, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": -0.0006621621621620832, "calib/mean_conf": 0.895612648221344, "calib/mu_c": 0.8953378378378382, "calib/mu_w": 0.8960000000000002, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3106324110671939, "calib/std_conf": 0.023626711580360647, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4920617529880478, "calib/step_q_c_n": 1004.0, "calib/step_q_gap": 0.045488477125978755, "calib/step_q_w": 0.446573275862069, "calib/step_q_w_n": 928.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2647.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 626.21875, "completions/mean_terminated_length": 626.21875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.17493333333333333, "grad_norm": 0.24319250881671906, "learning_rate": 1.0277777777777777e-06, "loss": 0.0384, "num_tokens": 42596423.0, "reward": 1.076171875, "reward_std": 0.27499398589134216, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.98828125, "step": 164 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 7.109375, "calib/ece": 0.4509375000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0234375, "calib/gap": 0.002016308376575293, "calib/mean_conf": 0.8962500000000002, "calib/mu_c": 0.8973684210526319, "calib/mu_w": 0.8953521126760566, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4509375000000002, "calib/std_conf": 0.022447856245084954, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.5375154511742892, "calib/step_q_c_n": 809.0, "calib/step_q_gap": 0.05436015938398259, "calib/step_q_w": 0.48315529179030664, "calib/step_q_w_n": 1011.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 559.75, "completions/mean_terminated_length": 561.9451293945312, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.176, "grad_norm": 0.24757790565490723, "learning_rate": 1.0000000000000002e-06, "loss": -0.0085, "num_tokens": 42869103.0, "reward": 0.939453125, "reward_std": 0.1965903639793396, "rewards/accuracy_reward_step": 0.4453125, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 7.40234375, "calib/ece": 0.2964453125000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0625, "calib/gap": 0.007933537051183825, "calib/mean_conf": 0.8980078125000003, "calib/mu_c": 0.9011688311688314, "calib/mu_w": 0.8932352941176476, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2964453125000003, "calib/std_conf": 0.02367454992106172, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5356290773532153, "calib/step_q_c_n": 1073.0, "calib/step_q_gap": 0.04134683891039298, "calib/step_q_w": 0.4942822384428223, "calib/step_q_w_n": 822.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1865.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 591.34765625, "completions/mean_terminated_length": 593.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.17706666666666668, "grad_norm": 0.23903323709964752, "learning_rate": 9.722222222222224e-07, "loss": -0.0011, "num_tokens": 43150480.0, "reward": 1.1015625, "reward_std": 0.17950758337974548, "rewards/accuracy_reward_step": 0.6015625, "rewards/format_reward_step": 1.0, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 7.3203125, "calib/ece": 0.21704724409448858, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": -0.0018104617141223578, "calib/mean_conf": 0.8981496062992128, "calib/mu_c": 0.8975722543352604, "calib/mu_w": 0.8993827160493828, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21704724409448858, "calib/std_conf": 0.017816907276078237, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.558006589785832, "calib/step_q_c_n": 1214.0, "calib/step_q_gap": 0.08468840796765009, "calib/step_q_w": 0.47331818181818186, "calib/step_q_w_n": 660.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 545.55859375, "completions/mean_terminated_length": 545.55859375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.17813333333333334, "grad_norm": 0.24380220472812653, "learning_rate": 9.444444444444445e-07, "loss": 0.0622, "num_tokens": 43419559.0, "reward": 1.169921875, "reward_std": 0.24672676622867584, "rewards/accuracy_reward_step": 0.67578125, "rewards/format_reward_step": 0.98828125, "step": 167 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.33366533864541864, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.006246937459703283, "calib/mean_conf": 0.8954183266932273, "calib/mu_c": 0.8981560283687946, "calib/mu_w": 0.8919090909090913, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33366533864541864, "calib/std_conf": 0.029942246912816883, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5973455377574372, "calib/step_q_c_n": 874.0, "calib/step_q_gap": 0.10586787452719659, "calib/step_q_w": 0.49147766323024056, "calib/step_q_w_n": 873.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 599.2109375, "completions/mean_terminated_length": 606.3162231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.1792, "grad_norm": 0.2356565147638321, "learning_rate": 9.166666666666666e-07, "loss": 0.0053, "num_tokens": 43701437.0, "reward": 1.041015625, "reward_std": 0.2755982577800751, "rewards/accuracy_reward_step": 0.55078125, "rewards/format_reward_step": 0.98046875, "step": 168 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.14453125, "calib/ece": 0.36667984189723346, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.039525691699604744, "calib/gap": 0.0017860278439728594, "calib/mean_conf": 0.896324110671937, "calib/mu_c": 0.8971641791044777, "calib/mu_w": 0.8953781512605048, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36667984189723346, "calib/std_conf": 0.025043947374533897, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5402594594594595, "calib/step_q_c_n": 925.0, "calib/step_q_gap": 0.0609563621143267, "calib/step_q_w": 0.4793030973451328, "calib/step_q_w_n": 904.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2455.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 580.05859375, "completions/mean_terminated_length": 582.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.18026666666666666, "grad_norm": 0.22578802704811096, "learning_rate": 8.88888888888889e-07, "loss": 0.0145, "num_tokens": 43977924.0, "reward": 1.015625, "reward_std": 0.23758748173713684, "rewards/accuracy_reward_step": 0.5234375, "rewards/format_reward_step": 0.984375, "step": 169 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.3147826086956524, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.009025798998844703, "calib/mean_conf": 0.895810276679842, "calib/mu_c": 0.8995918367346941, "calib/mu_w": 0.8905660377358494, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3147826086956524, "calib/std_conf": 0.058342009667291285, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5898637602179836, "calib/step_q_c_n": 1101.0, "calib/step_q_gap": 0.09208853086018542, "calib/step_q_w": 0.49777522935779817, "calib/step_q_w_n": 872.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1655.0, "completions/max_terminated_length": 1655.0, "completions/mean_length": 565.59375, "completions/mean_terminated_length": 570.0472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.18133333333333335, "grad_norm": 0.2454632967710495, "learning_rate": 8.611111111111112e-07, "loss": -0.0245, "num_tokens": 44250676.0, "reward": 1.068359375, "reward_std": 0.2688901424407959, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.98828125, "step": 170 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.84765625, "calib/ece": 0.39325490196078455, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.03912398424033525, "calib/mean_conf": 0.8720784313725491, "calib/mu_c": 0.8921774193548392, "calib/mu_w": 0.8530534351145039, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3895294117647061, "calib/std_conf": 0.1374159055692672, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.579527950310559, "calib/step_q_c_n": 805.0, "calib/step_q_gap": 0.028399258327436616, "calib/step_q_w": 0.5511286919831224, "calib/step_q_w_n": 948.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 529.40625, "completions/mean_terminated_length": 529.40625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.1824, "grad_norm": 0.265653133392334, "learning_rate": 8.333333333333333e-07, "loss": 0.0417, "num_tokens": 44516908.0, "reward": 0.982421875, "reward_std": 0.2800021767616272, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.99609375, "step": 171 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.255793650793651, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.014725274725274629, "calib/mean_conf": 0.8946825396825399, "calib/mu_c": 0.9000000000000002, "calib/mu_w": 0.8852747252747256, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.255793650793651, "calib/std_conf": 0.058779740975148646, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5805786802030458, "calib/step_q_c_n": 985.0, "calib/step_q_gap": 0.09230230576025233, "calib/step_q_w": 0.4882763744427935, "calib/step_q_w_n": 673.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 528.34375, "completions/mean_terminated_length": 530.4157104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.18346666666666667, "grad_norm": 0.25087136030197144, "learning_rate": 8.055555555555557e-07, "loss": 0.0088, "num_tokens": 44779324.0, "reward": 1.119140625, "reward_std": 0.24476386606693268, "rewards/accuracy_reward_step": 0.62890625, "rewards/format_reward_step": 0.98046875, "step": 172 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.26261904761904786, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": 0.0009184782608693265, "calib/mean_conf": 0.8975396825396826, "calib/mu_c": 0.8978750000000002, "calib/mu_w": 0.8969565217391309, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.26261904761904786, "calib/std_conf": 0.020651550075820927, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.5774318381706245, "calib/step_q_c_n": 1137.0, "calib/step_q_gap": 0.1100020628897257, "calib/step_q_w": 0.4674297752808988, "calib/step_q_w_n": 712.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2034.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 552.3046875, "completions/mean_terminated_length": 556.653564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.18453333333333333, "grad_norm": 0.2781237065792084, "learning_rate": 7.777777777777779e-07, "loss": 0.0031, "num_tokens": 45047682.0, "reward": 1.11328125, "reward_std": 0.28275614976882935, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.9765625, "step": 173 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.40470119521912384, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": -0.003434721869443824, "calib/mean_conf": 0.8963346613545817, "calib/mu_c": 0.8945967741935488, "calib/mu_w": 0.8980314960629926, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4035059760956179, "calib/std_conf": 0.025594749247233217, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.6048370136698213, "calib/step_q_c_n": 951.0, "calib/step_q_gap": 0.031507392288975034, "calib/step_q_w": 0.5733296213808463, "calib/step_q_w_n": 898.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2634.0, "completions/max_terminated_length": 2634.0, "completions/mean_length": 640.65234375, "completions/mean_terminated_length": 640.65234375, "completions/min_length": 194.0, "completions/min_terminated_length": 194.0, "epoch": 0.1856, "grad_norm": 0.22465208172798157, "learning_rate": 7.5e-07, "loss": -0.0072, "num_tokens": 45339729.0, "reward": 0.98046875, "reward_std": 0.2972414195537567, "rewards/accuracy_reward_step": 0.49609375, "rewards/format_reward_step": 0.96875, "step": 174 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.7421875, "calib/ece": 0.42886274509803946, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.0027100840336139465, "calib/mean_conf": 0.8955294117647059, "calib/mu_c": 0.8969747899159669, "calib/mu_w": 0.894264705882353, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.42886274509803946, "calib/std_conf": 0.02368945570119087, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.5905570652173913, "calib/step_q_c_n": 736.0, "calib/step_q_gap": 0.029658075318401367, "calib/step_q_w": 0.5608989898989899, "calib/step_q_w_n": 990.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 554.14453125, "completions/mean_terminated_length": 556.3176879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.18666666666666668, "grad_norm": 0.26532062888145447, "learning_rate": 7.222222222222222e-07, "loss": -0.0224, "num_tokens": 45611222.0, "reward": 0.9609375, "reward_std": 0.28407180309295654, "rewards/accuracy_reward_step": 0.46484375, "rewards/format_reward_step": 0.9921875, "step": 175 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.9140625, "calib/ece": 0.4006719367588934, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.039525691699604744, "calib/gap": 0.008097649412353247, "calib/mean_conf": 0.8907905138339921, "calib/mu_c": 0.89491935483871, "calib/mu_w": 0.8868217054263567, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4006719367588934, "calib/std_conf": 0.06436807132161741, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6102705882352941, "calib/step_q_c_n": 850.0, "calib/step_q_gap": 0.06380319693094627, "calib/step_q_w": 0.5464673913043478, "calib/step_q_w_n": 920.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 556.6171875, "completions/mean_terminated_length": 558.800048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.18773333333333334, "grad_norm": 0.22770024836063385, "learning_rate": 6.944444444444446e-07, "loss": 0.0075, "num_tokens": 45881588.0, "reward": 0.978515625, "reward_std": 0.2112065553665161, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.98828125, "step": 176 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.3003187250996017, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.0024726072607261784, "calib/mean_conf": 0.8979282868525899, "calib/mu_c": 0.8969333333333337, "calib/mu_w": 0.8994059405940599, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3003187250996017, "calib/std_conf": 0.01907447233017699, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6147472527472528, "calib/step_q_c_n": 910.0, "calib/step_q_gap": 0.1496460812882539, "calib/step_q_w": 0.46510117145899893, "calib/step_q_w_n": 939.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2455.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 554.67578125, "completions/mean_terminated_length": 561.2529907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1888, "grad_norm": 0.2137850821018219, "learning_rate": 6.666666666666667e-07, "loss": 0.0007, "num_tokens": 46151225.0, "reward": 1.076171875, "reward_std": 0.22957715392112732, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.5703125, "calib/ece": 0.31807843137254915, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.054901960784313725, "calib/gap": -0.0076066860833226, "calib/mean_conf": 0.9012156862745099, "calib/mu_c": 0.8980536912751682, "calib/mu_w": 0.9056603773584908, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3174901960784315, "calib/std_conf": 0.021136636463723723, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6420581395348838, "calib/step_q_c_n": 860.0, "calib/step_q_gap": 0.06930874780738983, "calib/step_q_w": 0.572749391727494, "calib/step_q_w_n": 822.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 544.83203125, "completions/mean_terminated_length": 544.83203125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.18986666666666666, "grad_norm": 0.27774763107299805, "learning_rate": 6.388888888888889e-07, "loss": 0.0453, "num_tokens": 46420582.0, "reward": 1.080078125, "reward_std": 0.2694709300994873, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 0.99609375, "step": 178 }, { "calib/answer_extract_rate": 0.98046875, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.28916334661354604, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.0002521008403362224, "calib/mean_conf": 0.8987250996015937, "calib/mu_c": 0.8988235294117649, "calib/mu_w": 0.8985714285714287, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28916334661354604, "calib/std_conf": 0.01857385729434993, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6282080329557158, "calib/step_q_c_n": 971.0, "calib/step_q_gap": 0.09612992974790546, "calib/step_q_w": 0.5320781032078103, "calib/step_q_w_n": 717.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2730.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 548.7734375, "completions/mean_terminated_length": 550.925537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.19093333333333334, "grad_norm": 0.25895431637763977, "learning_rate": 6.111111111111112e-07, "loss": 0.037, "num_tokens": 46691140.0, "reward": 1.087890625, "reward_std": 0.27308064699172974, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.98046875, "step": 179 }, { "calib/answer_extract_rate": 0.9765625, "calib/avg_num_step_conf": 7.3984375, "calib/ece": 0.31546184738955846, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024096385542168676, "calib/gap": -0.0001604774535810538, "calib/mean_conf": 0.8977911646586347, "calib/mu_c": 0.8977241379310346, "calib/mu_w": 0.8978846153846156, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31546184738955846, "calib/std_conf": 0.0190839553461964, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6108915441176471, "calib/step_q_c_n": 1088.0, "calib/step_q_gap": 0.04966325627645607, "calib/step_q_w": 0.561228287841191, "calib/step_q_w_n": 806.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 609.796875, "completions/mean_terminated_length": 617.0277099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.192, "grad_norm": 0.24032698571681976, "learning_rate": 5.833333333333334e-07, "loss": -0.0097, "num_tokens": 46974912.0, "reward": 1.052734375, "reward_std": 0.2881550192832947, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.97265625, "step": 180 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.328125, "calib/ece": 0.4085826771653546, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.008513647642680033, "calib/mean_conf": 0.8967716535433072, "calib/mu_c": 0.9011290322580648, "calib/mu_w": 0.8926153846153848, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4085826771653546, "calib/std_conf": 0.05877867053501822, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6302722772277228, "calib/step_q_c_n": 808.0, "calib/step_q_gap": 0.03709001121787059, "calib/step_q_w": 0.5931822660098522, "calib/step_q_w_n": 812.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2577.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 525.05859375, "completions/mean_terminated_length": 527.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.19306666666666666, "grad_norm": 0.26311588287353516, "learning_rate": 5.555555555555555e-07, "loss": 0.0051, "num_tokens": 47239399.0, "reward": 0.98046875, "reward_std": 0.29605424404144287, "rewards/accuracy_reward_step": 0.484375, "rewards/format_reward_step": 0.9921875, "step": 181 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.278149606299213, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03543307086614173, "calib/gap": -0.002381640291548748, "calib/mean_conf": 0.8962598425196852, "calib/mu_c": 0.895350318471338, "calib/mu_w": 0.8977319587628867, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.278149606299213, "calib/std_conf": 0.022267773613621172, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.644875406283857, "calib/step_q_c_n": 923.0, "calib/step_q_gap": 0.09663427153208393, "calib/step_q_w": 0.548241134751773, "calib/step_q_w_n": 705.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1650.0, "completions/max_terminated_length": 1650.0, "completions/mean_length": 542.12109375, "completions/mean_terminated_length": 546.3897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.19413333333333332, "grad_norm": 0.2820543646812439, "learning_rate": 5.277777777777779e-07, "loss": -0.0056, "num_tokens": 47508150.0, "reward": 1.10546875, "reward_std": 0.27713069319725037, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 0.984375, "step": 182 }, { "calib/answer_extract_rate": 0.984375, "calib/avg_num_step_conf": 6.92578125, "calib/ece": 0.41074509803921594, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.015864661654135803, "calib/mean_conf": 0.8817254901960786, "calib/mu_c": 0.8900000000000003, "calib/mu_w": 0.8741353383458645, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4070196078431375, "calib/std_conf": 0.12458622943943756, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5766496815286625, "calib/step_q_c_n": 785.0, "calib/step_q_gap": 0.040550390748520626, "calib/step_q_w": 0.5360992907801418, "calib/step_q_w_n": 987.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 590.49609375, "completions/mean_terminated_length": 590.49609375, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.1952, "grad_norm": 0.2631720006465912, "learning_rate": 5.000000000000001e-07, "loss": 0.0362, "num_tokens": 47789805.0, "reward": 0.966796875, "reward_std": 0.3247433304786682, "rewards/accuracy_reward_step": 0.4765625, "rewards/format_reward_step": 0.98046875, "step": 183 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.2305905511811025, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.0012464985994394828, "calib/mean_conf": 0.8998818897637797, "calib/mu_c": 0.900294117647059, "calib/mu_w": 0.8990476190476195, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2305905511811025, "calib/std_conf": 0.013987685344998382, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.678563484708063, "calib/step_q_c_n": 1079.0, "calib/step_q_gap": 0.06098200957941802, "calib/step_q_w": 0.617581475128645, "calib/step_q_w_n": 583.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1598.0, "completions/max_terminated_length": 1598.0, "completions/mean_length": 522.69140625, "completions/mean_terminated_length": 524.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.19626666666666667, "grad_norm": 0.23894599080085754, "learning_rate": 4.7222222222222226e-07, "loss": -0.0032, "num_tokens": 48052702.0, "reward": 1.16015625, "reward_std": 0.24039921164512634, "rewards/accuracy_reward_step": 0.6640625, "rewards/format_reward_step": 0.9921875, "step": 184 }, { "calib/answer_extract_rate": 0.95703125, "calib/avg_num_step_conf": 7.43359375, "calib/ece": 0.3223170731707319, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.07723577235772358, "calib/gap": -0.0016075297941493538, "calib/mean_conf": 0.8995528455284556, "calib/mu_c": 0.8988732394366201, "calib/mu_w": 0.9004807692307695, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3223170731707319, "calib/std_conf": 0.021323182259200085, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6366867469879519, "calib/step_q_c_n": 830.0, "calib/step_q_gap": 0.14737640216036574, "calib/step_q_w": 0.4893103448275862, "calib/step_q_w_n": 1073.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2774.0, "completions/max_terminated_length": 2774.0, "completions/mean_length": 536.65625, "completions/mean_terminated_length": 549.5360107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.19733333333333333, "grad_norm": 0.2608066499233246, "learning_rate": 4.444444444444445e-07, "loss": -0.0132, "num_tokens": 48320814.0, "reward": 1.03125, "reward_std": 0.29780781269073486, "rewards/accuracy_reward_step": 0.5546875, "rewards/format_reward_step": 0.953125, "step": 185 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.8828125, "calib/ece": 0.2933333333333336, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": -0.028536184210526616, "calib/mean_conf": 0.8815686274509805, "calib/mu_c": 0.8709375000000001, "calib/mu_w": 0.8994736842105268, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27372549019607867, "calib/std_conf": 0.1254227760068057, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5934462151394424, "calib/step_q_c_n": 1004.0, "calib/step_q_gap": 0.07409265313416535, "calib/step_q_w": 0.519353562005277, "calib/step_q_w_n": 758.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1705.0, "completions/max_terminated_length": 1705.0, "completions/mean_length": 562.0390625, "completions/mean_terminated_length": 564.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.1984, "grad_norm": 0.23022688925266266, "learning_rate": 4.1666666666666667e-07, "loss": 0.0173, "num_tokens": 48593544.0, "reward": 1.123046875, "reward_std": 0.23529818654060364, "rewards/accuracy_reward_step": 0.625, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 7.0234375, "calib/ece": 0.3737549407114627, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07905138339920949, "calib/gap": -0.001211152882205635, "calib/mean_conf": 0.8994466403162058, "calib/mu_c": 0.8988721804511278, "calib/mu_w": 0.9000833333333335, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3737549407114627, "calib/std_conf": 0.01989324568516636, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.5791103603603603, "calib/step_q_c_n": 888.0, "calib/step_q_gap": 0.004967503217503166, "calib/step_q_w": 0.5741428571428572, "calib/step_q_w_n": 910.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 560.66015625, "completions/mean_terminated_length": 562.85888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.19946666666666665, "grad_norm": 0.24886764585971832, "learning_rate": 3.8888888888888895e-07, "loss": 0.0519, "num_tokens": 48862425.0, "reward": 1.009765625, "reward_std": 0.2750778794288635, "rewards/accuracy_reward_step": 0.51953125, "rewards/format_reward_step": 0.98046875, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.9375, "calib/ece": 0.3312500000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0546875, "calib/gap": -0.005673723536737341, "calib/mean_conf": 0.8992187500000002, "calib/mu_c": 0.8967808219178085, "calib/mu_w": 0.9024545454545458, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3300781250000001, "calib/std_conf": 0.01639880783586111, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6178296988577363, "calib/step_q_c_n": 963.0, "calib/step_q_gap": -0.009353572974982072, "calib/step_q_w": 0.6271832718327184, "calib/step_q_w_n": 813.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2013.0, "completions/max_terminated_length": 2013.0, "completions/mean_length": 552.70703125, "completions/mean_terminated_length": 554.8745727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.20053333333333334, "grad_norm": 0.2365780621767044, "learning_rate": 3.611111111111111e-07, "loss": 0.018, "num_tokens": 49131798.0, "reward": 1.06640625, "reward_std": 0.2518237829208374, "rewards/accuracy_reward_step": 0.5703125, "rewards/format_reward_step": 0.9921875, "step": 188 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.2853125000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.03515625, "calib/gap": -0.0008106543138390876, "calib/mean_conf": 0.8985937500000003, "calib/mu_c": 0.8982802547770703, "calib/mu_w": 0.8990909090909094, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2853125000000002, "calib/std_conf": 0.018444781401184995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6221251348435815, "calib/step_q_c_n": 927.0, "calib/step_q_gap": 0.04616048094078329, "calib/step_q_w": 0.5759646539027982, "calib/step_q_w_n": 679.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 529.71875, "completions/mean_terminated_length": 531.7960815429688, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.2016, "grad_norm": 0.2585393488407135, "learning_rate": 3.3333333333333335e-07, "loss": 0.0195, "num_tokens": 49398982.0, "reward": 1.11328125, "reward_std": 0.2384297251701355, "rewards/accuracy_reward_step": 0.61328125, "rewards/format_reward_step": 1.0, "step": 189 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.9453125, "calib/ece": 0.33152343750000024, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.046875, "calib/gap": 0.0004802733768247691, "calib/mean_conf": 0.8979296875000002, "calib/mu_c": 0.8981379310344829, "calib/mu_w": 0.8976576576576581, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33152343750000024, "calib/std_conf": 0.018833432537706554, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.5846989141164858, "calib/step_q_c_n": 1013.0, "calib/step_q_gap": 0.032894992547858326, "calib/step_q_w": 0.5518039215686275, "calib/step_q_w_n": 765.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 573.59765625, "completions/mean_terminated_length": 575.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.20266666666666666, "grad_norm": 0.2151663452386856, "learning_rate": 3.055555555555556e-07, "loss": 0.0058, "num_tokens": 49675239.0, "reward": 1.06640625, "reward_std": 0.2194880247116089, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 1.0, "step": 190 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.80078125, "calib/ece": 0.38462745098039236, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.06274509803921569, "calib/gap": 0.003984615384615031, "calib/mean_conf": 0.8944313725490197, "calib/mu_c": 0.8963846153846154, "calib/mu_w": 0.8924000000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38462745098039236, "calib/std_conf": 0.04210596106182162, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6451715976331361, "calib/step_q_c_n": 845.0, "calib/step_q_gap": 0.07548409763313613, "calib/step_q_w": 0.5696875, "calib/step_q_w_n": 896.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1871.0, "completions/max_terminated_length": 1871.0, "completions/mean_length": 535.6328125, "completions/mean_terminated_length": 537.7333374023438, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.20373333333333332, "grad_norm": 0.2562410831451416, "learning_rate": 2.7777777777777776e-07, "loss": -0.0141, "num_tokens": 49940337.0, "reward": 1.005859375, "reward_std": 0.2665815055370331, "rewards/accuracy_reward_step": 0.5078125, "rewards/format_reward_step": 0.99609375, "step": 191 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.07421875, "calib/ece": 0.31270588235294133, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.08235294117647059, "calib/gap": 0.003866666666666685, "calib/mean_conf": 0.9009411764705882, "calib/mu_c": 0.9025333333333336, "calib/mu_w": 0.898666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31270588235294133, "calib/std_conf": 0.021618267298025014, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6592729439809298, "calib/step_q_c_n": 839.0, "calib/step_q_gap": 0.0744824411876337, "calib/step_q_w": 0.5847905027932961, "calib/step_q_w_n": 716.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1958.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 546.18359375, "completions/mean_terminated_length": 548.3255004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.2048, "grad_norm": 0.23097290098667145, "learning_rate": 2.5000000000000004e-07, "loss": 0.0133, "num_tokens": 50208944.0, "reward": 1.083984375, "reward_std": 0.2510668635368347, "rewards/accuracy_reward_step": 0.5859375, "rewards/format_reward_step": 0.99609375, "step": 192 }, { "calib/answer_extract_rate": 1.0, "calib/avg_num_step_conf": 6.25390625, "calib/ece": 0.3089453125000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.07421875, "calib/gap": 0.010991030546321334, "calib/mean_conf": 0.8909765625000002, "calib/mu_c": 0.8955704697986581, "calib/mu_w": 0.8845794392523367, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3089453125000002, "calib/std_conf": 0.052226065816636, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.663194130925508, "calib/step_q_c_n": 886.0, "calib/step_q_gap": 0.03484448057585765, "calib/step_q_w": 0.6283496503496504, "calib/step_q_w_n": 715.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 521.45703125, "completions/mean_terminated_length": 523.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.20586666666666667, "grad_norm": 0.26548540592193604, "learning_rate": 2.2222222222222224e-07, "loss": 0.0236, "num_tokens": 50471957.0, "reward": 1.08203125, "reward_std": 0.305894136428833, "rewards/accuracy_reward_step": 0.58203125, "rewards/format_reward_step": 1.0, "step": 193 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.37913385826771684, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07086614173228346, "calib/gap": 0.005250418916402655, "calib/mean_conf": 0.891732283464567, "calib/mu_c": 0.8942748091603054, "calib/mu_w": 0.8890243902439028, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37755905511811055, "calib/std_conf": 0.06434521179242914, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.616088560885609, "calib/step_q_c_n": 813.0, "calib/step_q_gap": 0.008372683448283125, "calib/step_q_w": 0.6077158774373259, "calib/step_q_w_n": 718.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 490.5078125, "completions/mean_terminated_length": 492.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.20693333333333333, "grad_norm": 0.22926728427410126, "learning_rate": 1.9444444444444447e-07, "loss": -0.0175, "num_tokens": 50727279.0, "reward": 1.0078125, "reward_std": 0.22462846338748932, "rewards/accuracy_reward_step": 0.51171875, "rewards/format_reward_step": 0.9921875, "step": 194 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.3208661417322838, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": -0.0008417572636532089, "calib/mean_conf": 0.8996062992125985, "calib/mu_c": 0.8992517006802723, "calib/mu_w": 0.9000934579439255, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3208661417322838, "calib/std_conf": 0.01993697055637064, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6379933481152994, "calib/step_q_c_n": 902.0, "calib/step_q_gap": 0.038135528210086034, "calib/step_q_w": 0.5998578199052134, "calib/step_q_w_n": 633.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1642.0, "completions/max_terminated_length": 1642.0, "completions/mean_length": 515.328125, "completions/mean_terminated_length": 517.3490600585938, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.208, "grad_norm": 0.2591729164123535, "learning_rate": 1.6666666666666668e-07, "loss": 0.0099, "num_tokens": 50988995.0, "reward": 1.068359375, "reward_std": 0.25752705335617065, "rewards/accuracy_reward_step": 0.57421875, "rewards/format_reward_step": 0.98828125, "step": 195 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 5.734375, "calib/ece": 0.28435294117647086, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.01677419354838705, "calib/mean_conf": 0.8921960784313727, "calib/mu_c": 0.8987741935483875, "calib/mu_w": 0.8820000000000005, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28435294117647086, "calib/std_conf": 0.07015305944381316, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6559217877094973, "calib/step_q_c_n": 895.0, "calib/step_q_gap": 0.032710618425029536, "calib/step_q_w": 0.6232111692844677, "calib/step_q_w_n": 573.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 460.08203125, "completions/mean_terminated_length": 460.08203125, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.20906666666666668, "grad_norm": 0.2433953881263733, "learning_rate": 1.3888888888888888e-07, "loss": 0.0054, "num_tokens": 51233128.0, "reward": 1.103515625, "reward_std": 0.18032386898994446, "rewards/accuracy_reward_step": 0.60546875, "rewards/format_reward_step": 0.99609375, "step": 196 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.0625, "calib/ece": 0.38945098039215703, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": 0.006306755260243269, "calib/mean_conf": 0.8953333333333334, "calib/mu_c": 0.8984496124031008, "calib/mu_w": 0.8921428571428576, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38945098039215703, "calib/std_conf": 0.05834806536660958, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.6825699067909454, "calib/step_q_c_n": 751.0, "calib/step_q_gap": 0.05902433875099533, "calib/step_q_w": 0.6235455680399501, "calib/step_q_w_n": 801.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2409.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 535.27734375, "completions/mean_terminated_length": 535.27734375, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.21013333333333334, "grad_norm": 0.2739056944847107, "learning_rate": 1.1111111111111112e-07, "loss": 0.0172, "num_tokens": 51499023.0, "reward": 1.001953125, "reward_std": 0.28341710567474365, "rewards/accuracy_reward_step": 0.50390625, "rewards/format_reward_step": 0.99609375, "step": 197 }, { "calib/answer_extract_rate": 0.99609375, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.2997647058823531, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.0005882352941175562, "calib/mean_conf": 0.899764705882353, "calib/mu_c": 0.9000000000000002, "calib/mu_w": 0.8994117647058827, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2997647058823531, "calib/std_conf": 0.016754967932040052, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6448475289169295, "calib/step_q_c_n": 951.0, "calib/step_q_gap": 0.02696772925081936, "calib/step_q_w": 0.6178797996661102, "calib/step_q_w_n": 599.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 489.25390625, "completions/mean_terminated_length": 489.25390625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.2112, "grad_norm": 0.26366642117500305, "learning_rate": 8.333333333333334e-08, "loss": 0.033, "num_tokens": 51753464.0, "reward": 1.09375, "reward_std": 0.23958630859851837, "rewards/accuracy_reward_step": 0.59765625, "rewards/format_reward_step": 0.9921875, "step": 198 }, { "calib/answer_extract_rate": 0.98828125, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.34881422924901206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05533596837944664, "calib/gap": -0.03208812260536431, "calib/mean_conf": 0.8843873517786562, "calib/mu_c": 0.8706896551724138, "calib/mu_w": 0.9027777777777781, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33003952569169986, "calib/std_conf": 0.11989834362679651, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.6364526659412406, "calib/step_q_c_n": 919.0, "calib/step_q_gap": 0.042475982003416735, "calib/step_q_w": 0.5939766839378239, "calib/step_q_w_n": 772.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2360.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 576.015625, "completions/mean_terminated_length": 578.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.21226666666666666, "grad_norm": 0.27529919147491455, "learning_rate": 5.555555555555556e-08, "loss": 0.0123, "num_tokens": 52028932.0, "reward": 1.05859375, "reward_std": 0.29672741889953613, "rewards/accuracy_reward_step": 0.56640625, "rewards/format_reward_step": 0.984375, "step": 199 }, { "calib/answer_extract_rate": 0.9921875, "calib/avg_num_step_conf": 5.5546875, "calib/ece": 0.2839920948616605, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06719367588932806, "calib/gap": 0.0021319058947925074, "calib/mean_conf": 0.9005928853754942, "calib/mu_c": 0.9014102564102566, "calib/mu_w": 0.8992783505154641, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2839920948616605, "calib/std_conf": 0.015680993151585224, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6641888619854723, "calib/step_q_c_n": 826.0, "calib/step_q_gap": 0.0346083250727206, "calib/step_q_w": 0.6295805369127517, "calib/step_q_w_n": 596.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 537.5, "completions/mean_terminated_length": 539.6078491210938, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.21333333333333335, "grad_norm": 0.24357278645038605, "learning_rate": 2.777777777777778e-08, "loss": -0.0128, "num_tokens": 52298388.0, "reward": 1.103515625, "reward_std": 0.20465883612632751, "rewards/accuracy_reward_step": 0.609375, "rewards/format_reward_step": 0.98828125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.020373184509808198, "train_runtime": 5912.1429, "train_samples_per_second": 8.66, "train_steps_per_second": 0.034 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 52298388, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }