{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.5285714285714285, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.02833333333333321, "calib/mean_conf": 0.9571428571428572, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.9450000000000001, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5285714285714285, "calib/std_conf": 0.033896601479156206, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 658.8203125, "completions/mean_terminated_length": 714.6525268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.09911441057920456, "learning_rate": 2.5000000000000004e-07, "loss": 0.0164, "num_tokens": 276242.0, "reward": 0.06046927720308304, "reward_std": 0.11048541963100433, "rewards/accuracy_reward_step": 0.02734375, "rewards/brier_reward_group": 0.015035448595881462, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.03152916580438614, "step": 1 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.1851851851851852, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.2141666666666665, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.01666666666666672, "calib/mean_conf": 0.9641666666666665, "calib/mu_c": 0.9599999999999999, "calib/mu_w": 0.9766666666666666, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.2141666666666665, "calib/std_conf": 0.014409680388158833, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 749.54296875, "completions/mean_terminated_length": 820.0128784179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.10169611871242523, "learning_rate": 5.000000000000001e-07, "loss": -0.0156, "num_tokens": 571413.0, "reward": 0.07521682232618332, "reward_std": 0.16352277994155884, "rewards/accuracy_reward_step": 0.0390625, "rewards/brier_reward_group": 0.01766689494252205, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.03320039063692093, "step": 2 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.15625, "calib/ece": 0.15399999999999991, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.04249999999999987, "calib/mean_conf": 0.954, "calib/mu_c": 0.9624999999999999, "calib/mu_w": 0.92, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.15399999999999991, "calib/std_conf": 0.017435595774162666, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 757.26171875, "completions/mean_terminated_length": 801.0701904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.06682799756526947, "learning_rate": 7.5e-07, "loss": -0.01, "num_tokens": 870528.0, "reward": 0.03093426302075386, "reward_std": 0.06631891429424286, "rewards/accuracy_reward_step": 0.015625, "rewards/brier_reward_group": 0.007611426059156656, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.014563126489520073, "step": 3 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.275, "calib/avg_num_step_conf": 0.15234375, "calib/ece": 0.4111111111111112, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.010499999999999954, "calib/mean_conf": 0.9666666666666668, "calib/mu_c": 0.962, "calib/mu_w": 0.9724999999999999, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.4111111111111112, "calib/std_conf": 0.016996731711975965, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 3052.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 641.2734375, "completions/mean_terminated_length": 736.17041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.011850867420434952, "learning_rate": 1.0000000000000002e-06, "loss": -0.005, "num_tokens": 1140862.0, "reward": 0.03999038413167, "reward_std": 0.09721466153860092, "rewards/accuracy_reward_step": 0.01953125, "rewards/brier_reward_group": 0.007201758213341236, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.019947277382016182, "step": 4 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.7, "calib/avg_num_step_conf": 0.15625, "calib/ece": 0.6800000000000002, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.020000000000000018, "calib/mean_conf": 0.9657142857142859, "calib/mu_c": 0.98, "calib/mu_w": 0.96, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6800000000000002, "calib/std_conf": 0.0328881840949181, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 707.04296875, "completions/mean_terminated_length": 793.872802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.010113976895809174, "learning_rate": 1.25e-06, "loss": -0.0062, "num_tokens": 1428553.0, "reward": 0.02361677773296833, "reward_std": 0.06166065111756325, "rewards/accuracy_reward_step": 0.0078125, "rewards/brier_reward_group": 0.002863086061552167, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.013479027897119522, "step": 5 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.53125, "calib/avg_num_step_conf": 0.41015625, "calib/ece": 0.5464999999999997, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.003750000000000142, "calib/mean_conf": 0.9464999999999998, "calib/mu_c": 0.94875, "calib/mu_w": 0.9449999999999998, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.5464999999999997, "calib/std_conf": 0.036779749863205966, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 652.85546875, "completions/mean_terminated_length": 711.1957397460938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.02027568221092224, "learning_rate": 1.5e-06, "loss": -0.013, "num_tokens": 1701636.0, "reward": 0.0833994597196579, "reward_std": 0.138918936252594, "rewards/accuracy_reward_step": 0.03125, "rewards/brier_reward_group": 0.025349901989102364, "rewards/format_reward_step": 0.0703125, "rewards/stepwise_brier_reward": 0.04262293130159378, "step": 6 }, { "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.2833333333333333, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.33909090909090905, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.9090909090909091, "calib/gap": 0.15866666666666662, "calib/mean_conf": 0.8845454545454546, "calib/mu_c": 0.9566666666666667, "calib/mu_w": 0.798, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.33909090909090905, "calib/std_conf": 0.26475936368928865, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 812.89453125, "completions/mean_terminated_length": 896.987060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.007466666666666667, "grad_norm": 0.013131404295563698, "learning_rate": 1.75e-06, "loss": -0.0002, "num_tokens": 2017161.0, "reward": 0.05258158966898918, "reward_std": 0.13502581417560577, "rewards/accuracy_reward_step": 0.0234375, "rewards/brier_reward_group": 0.013036327436566353, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.02541504241526127, "step": 7 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.36904761904761907, "calib/avg_num_step_conf": 0.21484375, "calib/ece": 0.49076923076923085, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": -0.004285714285714337, "calib/mean_conf": 0.9523076923076924, "calib/mu_c": 0.9500000000000001, "calib/mu_w": 0.9542857142857144, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.49076923076923085, "calib/std_conf": 0.0257663603452988, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 727.203125, "completions/mean_terminated_length": 792.1871948242188, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.008533333333333334, "grad_norm": 0.016019297763705254, "learning_rate": 2.0000000000000003e-06, "loss": 0.0123, "num_tokens": 2309837.0, "reward": 0.050940170884132385, "reward_std": 0.11515554040670395, "rewards/accuracy_reward_step": 0.0234375, "rewards/brier_reward_group": 0.008852246217429638, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.02303342893719673, "step": 8 }, { "calib/answer_extract_rate": 0.01171875, "calib/avg_num_step_conf": 0.1171875, "calib/ece": 0.9525, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.75, "calib/mean_conf": 0.9525, "calib/mu_c": NaN, "calib/mu_w": 0.9525, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.9525, "calib/std_conf": 0.04322904116447644, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 706.16015625, "completions/mean_terminated_length": 775.866943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.00911067146807909, "learning_rate": 2.25e-06, "loss": -0.0081, "num_tokens": 2598150.0, "reward": 0.007569408044219017, "reward_std": 0.021409519016742706, "rewards/accuracy_reward_step": 0.0, "rewards/brier_reward_group": 0.001342187519185245, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.0054979450069367886, "step": 9 }, { "calib/answer_extract_rate": 0.0390625, "calib/auroc": 0.7916666666666667, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.5619999999999999, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.029999999999999916, "calib/mean_conf": 0.962, "calib/mu_c": 0.98, "calib/mu_w": 0.9500000000000001, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.5619999999999999, "calib/std_conf": 0.029933259094191523, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 732.6484375, "completions/mean_terminated_length": 798.119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.014519107528030872, "learning_rate": 2.5e-06, "loss": -0.0183, "num_tokens": 2892508.0, "reward": 0.039961472153663635, "reward_std": 0.08913610875606537, "rewards/accuracy_reward_step": 0.015625, "rewards/brier_reward_group": 0.00905166007578373, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.017981717362999916, "step": 10 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.36904761904761907, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.4615384615384616, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.09023809523809523, "calib/mean_conf": 0.8830769230769231, "calib/mu_c": 0.9316666666666666, "calib/mu_w": 0.8414285714285714, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.44153846153846166, "calib/std_conf": 0.22972842211836586, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 749.4765625, "completions/mean_terminated_length": 827.0086059570312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.010001948103308678, "learning_rate": 2.7500000000000004e-06, "loss": -0.0119, "num_tokens": 3188854.0, "reward": 0.05315268039703369, "reward_std": 0.09994122385978699, "rewards/accuracy_reward_step": 0.0234375, "rewards/brier_reward_group": 0.014543652534484863, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.026192057877779007, "step": 11 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.42857142857142866, "calib/avg_num_step_conf": 0.28515625, "calib/ece": 0.41066666666666657, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.0128571428571429, "calib/mean_conf": 0.9440000000000001, "calib/mu_c": 0.95, "calib/mu_w": 0.937142857142857, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.41066666666666657, "calib/std_conf": 0.04644710252893427, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2939.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 703.30859375, "completions/mean_terminated_length": 756.5000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.019046172499656677, "learning_rate": 3e-06, "loss": -0.0067, "num_tokens": 3473077.0, "reward": 0.06333271414041519, "reward_std": 0.14968423545360565, "rewards/accuracy_reward_step": 0.03125, "rewards/brier_reward_group": 0.015546679496765137, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.02684667706489563, "step": 12 }, { "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.33916083916083917, "calib/avg_num_step_conf": 0.6484375, "calib/ece": 0.4321666666666666, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.9583333333333334, "calib/gap": -0.02109090909090916, "calib/mean_conf": 0.9596666666666666, "calib/mu_c": 0.95, "calib/mu_w": 0.9710909090909091, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.42508333333333326, "calib/std_conf": 0.03698047532895644, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 769.18359375, "completions/mean_terminated_length": 827.357177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.023658715188503265, "learning_rate": 3.2500000000000002e-06, "loss": -0.0416, "num_tokens": 3774580.0, "reward": 0.10999283194541931, "reward_std": 0.20208735764026642, "rewards/accuracy_reward_step": 0.05078125, "rewards/brier_reward_group": 0.029942620545625687, "rewards/format_reward_step": 0.078125, "rewards/stepwise_brier_reward": 0.05065370351076126, "step": 13 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.42499999999999993, "calib/avg_num_step_conf": 0.4921875, "calib/ece": 0.41, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.08399999999999996, "calib/mean_conf": 0.9099999999999999, "calib/mu_c": 0.952, "calib/mu_w": 0.868, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.41, "calib/std_conf": 0.20997618912629118, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 683.984375, "completions/mean_terminated_length": 758.0086669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.014067032374441624, "learning_rate": 3.5e-06, "loss": -0.0027, "num_tokens": 4055080.0, "reward": 0.0887521356344223, "reward_std": 0.19326013326644897, "rewards/accuracy_reward_step": 0.0390625, "rewards/brier_reward_group": 0.023042481392621994, "rewards/format_reward_step": 0.06640625, "rewards/stepwise_brier_reward": 0.042903535068035126, "step": 14 }, { "calib/answer_extract_rate": 0.14453125, "calib/auroc": 0.30000000000000004, "calib/avg_num_step_conf": 0.796875, "calib/ece": 0.5422058823529413, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.11328125, "calib/frac_conf_gt_0.9": 0.9117647058823529, "calib/gap": -0.018892857142857045, "calib/mean_conf": 0.9539705882352942, "calib/mu_c": 0.942857142857143, "calib/mu_w": 0.96175, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.13671875, "calib/pce": 0.5422058823529413, "calib/std_conf": 0.026395727477486648, "calib/step_conf_rate": 0.13671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 627.1328125, "completions/mean_terminated_length": 686.0940551757812, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.016, "grad_norm": 0.022355226799845695, "learning_rate": 3.7500000000000005e-06, "loss": -0.0183, "num_tokens": 4323506.0, "reward": 0.13517887890338898, "reward_std": 0.30949950218200684, "rewards/accuracy_reward_step": 0.0546875, "rewards/brier_reward_group": 0.024587303400039673, "rewards/format_reward_step": 0.11328125, "rewards/stepwise_brier_reward": 0.07081569731235504, "step": 15 }, { "calib/answer_extract_rate": 0.1796875, "calib/auroc": 0.43353174603174605, "calib/avg_num_step_conf": 1.34375, "calib/ece": 0.5682826086956521, "calib/final_conf_rate": 0.1796875, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.9130434782608695, "calib/gap": -0.006623015873016058, "calib/mean_conf": 0.959586956521739, "calib/mu_c": 0.9555555555555555, "calib/mu_w": 0.9621785714285715, "calib/nonempty_final_conf_rate": 0.1796875, "calib/nonempty_reasoning_rate": 0.2109375, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.5682826086956521, "calib/std_conf": 0.026132252342199097, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 814.40234375, "completions/mean_terminated_length": 883.4194946289062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.023184077814221382, "learning_rate": 4.000000000000001e-06, "loss": -0.0113, "num_tokens": 4640841.0, "reward": 0.18745586276054382, "reward_std": 0.32550889253616333, "rewards/accuracy_reward_step": 0.0703125, "rewards/brier_reward_group": 0.04953538626432419, "rewards/format_reward_step": 0.16015625, "rewards/stepwise_brier_reward": 0.09872552752494812, "step": 16 }, { "calib/answer_extract_rate": 0.40234375, "calib/auroc": 0.4261363636363636, "calib/avg_num_step_conf": 2.6953125, "calib/ece": 0.39964999999999995, "calib/final_conf_rate": 0.390625, "calib/format_rate": 0.33984375, "calib/frac_conf_gt_0.9": 0.93, "calib/gap": -0.0004220779220778237, "calib/mean_conf": 0.95965, "calib/mu_c": 0.9594642857142857, "calib/mu_w": 0.9598863636363635, "calib/nonempty_final_conf_rate": 0.390625, "calib/nonempty_reasoning_rate": 0.4453125, "calib/nonempty_step_conf_rate": 0.390625, "calib/pce": 0.39964999999999995, "calib/std_conf": 0.039269931245165174, "calib/step_conf_rate": 0.390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 724.234375, "completions/mean_terminated_length": 753.6747436523438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.0288087148219347, "learning_rate": 4.25e-06, "loss": -0.0327, "num_tokens": 4929773.0, "reward": 0.5019322037696838, "reward_std": 0.60835862159729, "rewards/accuracy_reward_step": 0.22265625, "rewards/brier_reward_group": 0.19423416256904602, "rewards/format_reward_step": 0.33984375, "rewards/stepwise_brier_reward": 0.24318215250968933, "step": 17 }, { "calib/answer_extract_rate": 0.515625, "calib/auroc": 0.48233430799220267, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.36853846153846154, "calib/final_conf_rate": 0.5078125, "calib/format_rate": 0.4921875, "calib/frac_conf_gt_0.9": 0.9076923076923077, "calib/gap": 0.012997076023391507, "calib/mean_conf": 0.9531538461538461, "calib/mu_c": 0.9585526315789471, "calib/mu_w": 0.9455555555555556, "calib/nonempty_final_conf_rate": 0.5078125, "calib/nonempty_reasoning_rate": 0.57421875, "calib/nonempty_step_conf_rate": 0.55859375, "calib/pce": 0.36853846153846154, "calib/std_conf": 0.05547049964946183, "calib/step_conf_rate": 0.55859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 679.6015625, "completions/mean_terminated_length": 715.9588012695312, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0192, "grad_norm": 0.023622112348675728, "learning_rate": 4.5e-06, "loss": 0.0538, "num_tokens": 5214471.0, "reward": 0.71192467212677, "reward_std": 0.6083460450172424, "rewards/accuracy_reward_step": 0.3046875, "rewards/brier_reward_group": 0.3079518675804138, "rewards/format_reward_step": 0.4921875, "rewards/stepwise_brier_reward": 0.33662158250808716, "step": 18 }, { "calib/answer_extract_rate": 0.78125, "calib/auroc": 0.5199610016420361, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.5412095, "calib/final_conf_rate": 0.78125, "calib/format_rate": 0.734375, "calib/frac_conf_gt_0.9": 0.93, "calib/gap": -0.0034010262725783225, "calib/mean_conf": 0.9522095, "calib/mu_c": 0.9502369047619046, "calib/mu_w": 0.9536379310344829, "calib/nonempty_final_conf_rate": 0.78125, "calib/nonempty_reasoning_rate": 0.87890625, "calib/nonempty_step_conf_rate": 0.86328125, "calib/pce": 0.5367095000000001, "calib/std_conf": 0.07131394085135108, "calib/step_conf_rate": 0.86328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 475.46484375, "completions/mean_terminated_length": 477.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.020266666666666665, "grad_norm": 0.02240210771560669, "learning_rate": 4.75e-06, "loss": -0.0198, "num_tokens": 5440950.0, "reward": 0.9314632415771484, "reward_std": 0.6002488732337952, "rewards/accuracy_reward_step": 0.3359375, "rewards/brier_reward_group": 0.41432082653045654, "rewards/format_reward_step": 0.734375, "rewards/stepwise_brier_reward": 0.49903208017349243, "step": 19 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.4548149905123339, "calib/avg_num_step_conf": 6.85546875, "calib/ece": 0.49933539823008855, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.8938053097345132, "calib/gap": 0.003348387096774208, "calib/mean_conf": 0.9506628318584072, "calib/mu_c": 0.9525, "calib/mu_w": 0.9491516129032258, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.49933539823008855, "calib/std_conf": 0.05559933897660515, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2937.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 455.4453125, "completions/mean_terminated_length": 457.2314147949219, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.021333333333333333, "grad_norm": 0.03255808353424072, "learning_rate": 5e-06, "loss": 0.0156, "num_tokens": 5662416.0, "reward": 1.0877689123153687, "reward_std": 0.5388917922973633, "rewards/accuracy_reward_step": 0.40625, "rewards/brier_reward_group": 0.4922211170196533, "rewards/format_reward_step": 0.8359375, "rewards/stepwise_brier_reward": 0.5619795918464661, "step": 20 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5604197067611701, "calib/avg_num_step_conf": 7.26171875, "calib/ece": 0.4510438154166667, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.8791666666666667, "calib/gap": 0.025455469606003867, "calib/mean_conf": 0.9385438154166666, "calib/mu_c": 0.9515897435897436, "calib/mu_w": 0.9261342739837397, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.4510438154166667, "calib/std_conf": 0.09832152328578064, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1987.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 442.390625, "completions/mean_terminated_length": 444.1255187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.0224, "grad_norm": 0.04002515599131584, "learning_rate": 4.9722222222222224e-06, "loss": -0.0188, "num_tokens": 5878628.0, "reward": 1.235422134399414, "reward_std": 0.5283094644546509, "rewards/accuracy_reward_step": 0.4609375, "rewards/brier_reward_group": 0.6382216215133667, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.6394045352935791, "step": 21 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5054565547727586, "calib/avg_num_step_conf": 7.34375, "calib/ece": 0.41908713692946054, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.8838174273858921, "calib/gap": 0.005586752313855636, "calib/mean_conf": 0.946058091286307, "calib/mu_c": 0.9487007874015749, "calib/mu_w": 0.9431140350877193, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.41908713692946054, "calib/std_conf": 0.050211386630864475, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 401.01171875, "completions/mean_terminated_length": 405.7668151855469, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.023466666666666667, "grad_norm": 0.014912611804902554, "learning_rate": 4.944444444444445e-06, "loss": -0.0018, "num_tokens": 6083103.0, "reward": 1.2581007480621338, "reward_std": 0.44688111543655396, "rewards/accuracy_reward_step": 0.5, "rewards/brier_reward_group": 0.6115713119506836, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.6005195379257202, "step": 22 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5582003746528001, "calib/avg_num_step_conf": 8.078125, "calib/ece": 0.3934, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.844, "calib/gap": 0.010636909760351254, "calib/mean_conf": 0.93636, "calib/mu_c": 0.9411678832116788, "calib/mu_w": 0.9305309734513275, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.39088, "calib/std_conf": 0.07596808803701723, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1372.0, "completions/max_terminated_length": 1372.0, "completions/mean_length": 438.84765625, "completions/mean_terminated_length": 445.8135070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.024533333333333334, "grad_norm": 0.01338372752070427, "learning_rate": 4.9166666666666665e-06, "loss": -0.0124, "num_tokens": 6299384.0, "reward": 1.3480709791183472, "reward_std": 0.4616602957248688, "rewards/accuracy_reward_step": 0.53515625, "rewards/brier_reward_group": 0.7146314382553101, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6307774782180786, "step": 23 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5213293650793651, "calib/avg_num_step_conf": 8.30078125, "calib/ece": 0.520722891566265, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8594377510040161, "calib/gap": 0.009337301587301505, "calib/mean_conf": 0.9424096385542169, "calib/mu_c": 0.9478095238095239, "calib/mu_w": 0.9384722222222224, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.520722891566265, "calib/std_conf": 0.06085581804420343, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 461.5625, "completions/mean_terminated_length": 468.888916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.0256, "grad_norm": 0.016162144020199776, "learning_rate": 4.888888888888889e-06, "loss": -0.0411, "num_tokens": 6522056.0, "reward": 1.1937477588653564, "reward_std": 0.47592926025390625, "rewards/accuracy_reward_step": 0.41015625, "rewards/brier_reward_group": 0.6197054386138916, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.5927855372428894, "step": 24 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5154220779220778, "calib/avg_num_step_conf": 8.546875, "calib/ece": 0.38333333333333325, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8470588235294118, "calib/gap": -0.00196553446553438, "calib/mean_conf": 0.9392549019607843, "calib/mu_c": 0.9383916083916084, "calib/mu_w": 0.9403571428571428, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3809019607843136, "calib/std_conf": 0.03940530627016226, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2528.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 468.71484375, "completions/mean_terminated_length": 474.2727355957031, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.02666666666666667, "grad_norm": 0.013917172327637672, "learning_rate": 4.861111111111111e-06, "loss": -0.0145, "num_tokens": 6745271.0, "reward": 1.4023184776306152, "reward_std": 0.31770461797714233, "rewards/accuracy_reward_step": 0.55859375, "rewards/brier_reward_group": 0.720646858215332, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.662064790725708, "step": 25 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5097145122918318, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.32644268774703555, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8656126482213439, "calib/gap": 0.00112212529738287, "calib/mean_conf": 0.9368774703557312, "calib/mu_c": 0.9373076923076921, "calib/mu_w": 0.9361855670103092, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3233596837944664, "calib/std_conf": 0.05741579851654595, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2298.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 448.64453125, "completions/mean_terminated_length": 453.9644470214844, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.027733333333333332, "grad_norm": 0.009529990144073963, "learning_rate": 4.833333333333333e-06, "loss": -0.0049, "num_tokens": 6965364.0, "reward": 1.4513397216796875, "reward_std": 0.3023107051849365, "rewards/accuracy_reward_step": 0.609375, "rewards/brier_reward_group": 0.7412692308425903, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6578395962715149, "step": 26 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4055364204617936, "calib/avg_num_step_conf": 8.36328125, "calib/ece": 0.3948605577689242, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7649402390438247, "calib/gap": -0.009161245056767298, "calib/mean_conf": 0.9267330677290837, "calib/mu_c": 0.9224626865671642, "calib/mu_w": 0.9316239316239315, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3938645418326692, "calib/std_conf": 0.0453196313555899, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 493.4140625, "completions/mean_terminated_length": 497.2992248535156, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.0288, "grad_norm": 0.009581191465258598, "learning_rate": 4.805555555555556e-06, "loss": 0.0041, "num_tokens": 7196894.0, "reward": 1.3459198474884033, "reward_std": 0.36943548917770386, "rewards/accuracy_reward_step": 0.5234375, "rewards/brier_reward_group": 0.7029818296432495, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6260105967521667, "step": 27 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.39229926007125243, "calib/avg_num_step_conf": 7.91015625, "calib/ece": 0.27197628458498024, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6719367588932806, "calib/gap": -0.016911482597971927, "calib/mean_conf": 0.9152173913043479, "calib/mu_c": 0.9092682926829269, "calib/mu_w": 0.9261797752808988, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26948616600790515, "calib/std_conf": 0.05624315657899554, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2551.0, "completions/max_terminated_length": 2551.0, "completions/mean_length": 513.23046875, "completions/mean_terminated_length": 519.3162231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.029866666666666666, "grad_norm": 0.012991939671337605, "learning_rate": 4.777777777777778e-06, "loss": 0.0344, "num_tokens": 7435225.0, "reward": 1.514519214630127, "reward_std": 0.35605090856552124, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.8041585087776184, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.722668468952179, "step": 28 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4374403967194354, "calib/avg_num_step_conf": 8.0, "calib/ece": 0.3319291338582678, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6141732283464567, "calib/gap": -0.008855617013160288, "calib/mean_conf": 0.9106692913385827, "calib/mu_c": 0.9069387755102042, "calib/mu_w": 0.9157943925233645, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3319291338582678, "calib/std_conf": 0.0448350529202759, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2433.0, "completions/max_terminated_length": 2433.0, "completions/mean_length": 561.73046875, "completions/mean_terminated_length": 566.153564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.030933333333333334, "grad_norm": 0.010171281173825264, "learning_rate": 4.75e-06, "loss": 0.0169, "num_tokens": 7686156.0, "reward": 1.4394994974136353, "reward_std": 0.36331403255462646, "rewards/accuracy_reward_step": 0.57421875, "rewards/brier_reward_group": 0.7675879001617432, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7091603875160217, "step": 29 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5130804228556136, "calib/avg_num_step_conf": 7.453125, "calib/ece": 0.2665612648221343, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.41106719367588934, "calib/gap": -0.0003050983540746621, "calib/mean_conf": 0.8891699604743083, "calib/mu_c": 0.889056603773585, "calib/mu_w": 0.8893617021276596, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26363636363636356, "calib/std_conf": 0.05355082147853668, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2295.0, "completions/max_terminated_length": 2295.0, "completions/mean_length": 539.1796875, "completions/mean_terminated_length": 543.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.032, "grad_norm": 0.018558280542492867, "learning_rate": 4.722222222222222e-06, "loss": 0.0198, "num_tokens": 7931170.0, "reward": 1.4883720874786377, "reward_std": 0.3338737487792969, "rewards/accuracy_reward_step": 0.62109375, "rewards/brier_reward_group": 0.7994797229766846, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7008838653564453, "step": 30 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4583174603174603, "calib/avg_num_step_conf": 7.23828125, "calib/ece": 0.3742629482071713, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.27091633466135456, "calib/gap": -0.002998095238095466, "calib/mean_conf": 0.8762549800796813, "calib/mu_c": 0.8747619047619046, "calib/mu_w": 0.8777600000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3742629482071713, "calib/std_conf": 0.05041973943408669, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2521.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 577.0390625, "completions/mean_terminated_length": 579.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.03306666666666667, "grad_norm": 0.008476846851408482, "learning_rate": 4.694444444444445e-06, "loss": 0.0122, "num_tokens": 8184804.0, "reward": 1.3323320150375366, "reward_std": 0.32351502776145935, "rewards/accuracy_reward_step": 0.4921875, "rewards/brier_reward_group": 0.7001621723175049, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.699478805065155, "step": 31 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5121424030514939, "calib/avg_num_step_conf": 6.30859375, "calib/ece": 0.27343873517786554, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": 0.002286713286713371, "calib/mean_conf": 0.8386561264822134, "calib/mu_c": 0.8396503496503496, "calib/mu_w": 0.8373636363636362, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27343873517786554, "calib/std_conf": 0.06560581420669309, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2242.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 507.671875, "completions/mean_terminated_length": 511.6692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.034133333333333335, "grad_norm": 0.01096720714122057, "learning_rate": 4.666666666666667e-06, "loss": -0.0019, "num_tokens": 8421472.0, "reward": 1.4251006841659546, "reward_std": 0.2962689697742462, "rewards/accuracy_reward_step": 0.55859375, "rewards/brier_reward_group": 0.7659081816673279, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7391819953918457, "step": 32 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5132413882413882, "calib/avg_num_step_conf": 6.27734375, "calib/ece": 0.2715537848605577, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07171314741035857, "calib/gap": 0.0026249676249674447, "calib/mean_conf": 0.8301992031872509, "calib/mu_c": 0.8313286713286712, "calib/mu_w": 0.8287037037037037, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26601593625498, "calib/std_conf": 0.0617568177709778, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 534.4921875, "completions/mean_terminated_length": 540.830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.0352, "grad_norm": 0.009881477802991867, "learning_rate": 4.638888888888889e-06, "loss": 0.0407, "num_tokens": 8665174.0, "reward": 1.428787112236023, "reward_std": 0.2549947500228882, "rewards/accuracy_reward_step": 0.55859375, "rewards/brier_reward_group": 0.7520343661308289, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7678015232086182, "step": 33 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4415031592949784, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.1896428571428571, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.01558563352178255, "calib/mean_conf": 0.7915476190476189, "calib/mu_c": 0.7855483870967741, "calib/mu_w": 0.8011340206185567, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18305555555555553, "calib/std_conf": 0.06934776717235662, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 496.3671875, "completions/mean_terminated_length": 500.27557373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.03626666666666667, "grad_norm": 0.013796166516840458, "learning_rate": 4.611111111111112e-06, "loss": 0.0134, "num_tokens": 8897356.0, "reward": 1.513746976852417, "reward_std": 0.4082501530647278, "rewards/accuracy_reward_step": 0.60546875, "rewards/brier_reward_group": 0.8546947240829468, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8096679449081421, "step": 34 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5840664711632454, "calib/avg_num_step_conf": 5.4765625, "calib/ece": 0.14336032388663963, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.024291497975708502, "calib/gap": 0.024426057813154656, "calib/mean_conf": 0.7668421052631579, "calib/mu_c": 0.776038961038961, "calib/mu_w": 0.7516129032258063, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.14336032388663963, "calib/std_conf": 0.0838916025605856, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2466.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 592.1328125, "completions/mean_terminated_length": 599.1541748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.037333333333333336, "grad_norm": 0.007923703640699387, "learning_rate": 4.583333333333333e-06, "loss": 0.0172, "num_tokens": 9158198.0, "reward": 1.4799138307571411, "reward_std": 0.29859060049057007, "rewards/accuracy_reward_step": 0.6015625, "rewards/brier_reward_group": 0.8021693229675293, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7815483808517456, "step": 35 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5521396396396396, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.05578740157480315, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.01752852852852871, "calib/mean_conf": 0.7644488188976378, "calib/mu_c": 0.7695555555555558, "calib/mu_w": 0.7520270270270271, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05578740157480315, "calib/std_conf": 0.08590126255215237, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2492.0, "completions/max_terminated_length": 2492.0, "completions/mean_length": 502.5078125, "completions/mean_terminated_length": 506.4645690917969, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.0384, "grad_norm": 0.01005138736218214, "learning_rate": 4.555555555555556e-06, "loss": 0.0007, "num_tokens": 9389552.0, "reward": 1.626293659210205, "reward_std": 0.27581554651260376, "rewards/accuracy_reward_step": 0.703125, "rewards/brier_reward_group": 0.8694294691085815, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8466823697090149, "step": 36 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5134461498097862, "calib/avg_num_step_conf": 5.0390625, "calib/ece": 0.2932388663967612, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.06477732793522267, "calib/gap": 0.0018890200708382432, "calib/mean_conf": 0.8033603238866396, "calib/mu_c": 0.8042857142857143, "calib/mu_w": 0.802396694214876, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2932388663967612, "calib/std_conf": 0.07476578628139789, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 600.9609375, "completions/mean_terminated_length": 605.6929321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.039466666666666664, "grad_norm": 0.008782311342656612, "learning_rate": 4.527777777777778e-06, "loss": 0.0043, "num_tokens": 9650494.0, "reward": 1.3417272567749023, "reward_std": 0.33047229051589966, "rewards/accuracy_reward_step": 0.4921875, "rewards/brier_reward_group": 0.7289199233055115, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7551764845848083, "step": 37 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5028111603064165, "calib/avg_num_step_conf": 4.9765625, "calib/ece": 0.20300813008130075, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.07317073170731707, "calib/gap": 0.009356947079907085, "calib/mean_conf": 0.8249593495934959, "calib/mu_c": 0.8284967320261437, "calib/mu_w": 0.8191397849462366, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.20300813008130075, "calib/std_conf": 0.07112766472342436, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2529.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 588.40234375, "completions/mean_terminated_length": 595.3794555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.04053333333333333, "grad_norm": 0.01418561115860939, "learning_rate": 4.5e-06, "loss": 0.0212, "num_tokens": 9908013.0, "reward": 1.4612244367599487, "reward_std": 0.3273961544036865, "rewards/accuracy_reward_step": 0.59765625, "rewards/brier_reward_group": 0.7723513841629028, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7678588032722473, "step": 38 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5400226229290039, "calib/avg_num_step_conf": 5.515625, "calib/ece": 0.32918699186991873, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.2601626016260163, "calib/gap": 0.011975514006254517, "calib/mean_conf": 0.8698373983739838, "calib/mu_c": 0.8753383458646616, "calib/mu_w": 0.8633628318584071, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.32918699186991873, "calib/std_conf": 0.06699695102681971, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 648.86328125, "completions/mean_terminated_length": 648.86328125, "completions/min_length": 232.0, "completions/min_terminated_length": 232.0, "epoch": 0.0416, "grad_norm": 0.014082246460020542, "learning_rate": 4.472222222222223e-06, "loss": 0.0524, "num_tokens": 10180210.0, "reward": 1.362987756729126, "reward_std": 0.37360742688179016, "rewards/accuracy_reward_step": 0.51953125, "rewards/brier_reward_group": 0.7202534675598145, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7473224401473999, "step": 39 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4992827868852459, "calib/avg_num_step_conf": 6.06640625, "calib/ece": 0.40673553719008254, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.5206611570247934, "calib/gap": 0.002109289617486243, "calib/mean_conf": 0.902603305785124, "calib/mu_c": 0.9036666666666666, "calib/mu_w": 0.9015573770491804, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40673553719008254, "calib/std_conf": 0.04406324893181568, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2366.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 693.1015625, "completions/mean_terminated_length": 704.1032104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.042666666666666665, "grad_norm": 0.012825295329093933, "learning_rate": 4.444444444444444e-06, "loss": 0.0374, "num_tokens": 10464404.0, "reward": 1.2686021327972412, "reward_std": 0.407518208026886, "rewards/accuracy_reward_step": 0.46875, "rewards/brier_reward_group": 0.645278811454773, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6713171005249023, "step": 40 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.61613358894158, "calib/avg_num_step_conf": 6.484375, "calib/ece": 0.14174999999999993, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.03217233377055784, "calib/mean_conf": 0.9209166666666666, "calib/mu_c": 0.9280213903743315, "calib/mu_w": 0.8958490566037737, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14174999999999993, "calib/std_conf": 0.06836782666007617, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 687.8203125, "completions/mean_terminated_length": 690.5177001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.04373333333333333, "grad_norm": 0.009431262500584126, "learning_rate": 4.416666666666667e-06, "loss": 0.0901, "num_tokens": 10747734.0, "reward": 1.6060315370559692, "reward_std": 0.40255552530288696, "rewards/accuracy_reward_step": 0.73046875, "rewards/brier_reward_group": 0.8279871344566345, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7992638349533081, "step": 41 }, { "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.5217840608465608, "calib/avg_num_step_conf": 9.875, "calib/ece": 0.3039473684210526, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.8552631578947368, "calib/gap": 0.0047916666666665275, "calib/mean_conf": 0.9355263157894738, "calib/mu_c": 0.9372916666666666, "calib/mu_w": 0.9325000000000001, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3039473684210526, "calib/std_conf": 0.03183267948065038, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2735.0, "completions/max_terminated_length": 2735.0, "completions/mean_length": 763.11328125, "completions/mean_terminated_length": 772.162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 299.0, "epoch": 0.0448, "grad_norm": 0.011268241330981255, "learning_rate": 4.388888888888889e-06, "loss": 0.0707, "num_tokens": 11047459.0, "reward": 1.3278944492340088, "reward_std": 0.4228435754776001, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.6321179866790771, "rewards/format_reward_step": 0.8828125, "rewards/stepwise_brier_reward": 0.663834810256958, "step": 42 }, { "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.5982491925888153, "calib/avg_num_step_conf": 8.68359375, "calib/ece": 0.25412017167381973, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.8369098712446352, "calib/gap": 0.020009348971612995, "calib/mean_conf": 0.933519313304721, "calib/mu_c": 0.9398742138364782, "calib/mu_w": 0.9198648648648652, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2526180257510729, "calib/std_conf": 0.059674156816859406, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 727.28515625, "completions/mean_terminated_length": 733.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.04586666666666667, "grad_norm": 0.009542334824800491, "learning_rate": 4.361111111111112e-06, "loss": 0.0603, "num_tokens": 11338868.0, "reward": 1.4376327991485596, "reward_std": 0.5053052306175232, "rewards/accuracy_reward_step": 0.62109375, "rewards/brier_reward_group": 0.7418070435523987, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.71184903383255, "step": 43 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4944179714091219, "calib/avg_num_step_conf": 5.859375, "calib/ece": 0.40909465020576125, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.9300411522633745, "calib/gap": -0.0006555479918309848, "calib/mean_conf": 0.944074074074074, "calib/mu_c": 0.9437692307692308, "calib/mu_w": 0.9444247787610618, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.40909465020576125, "calib/std_conf": 0.02147509483603587, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 637.25, "completions/mean_terminated_length": 649.9442138671875, "completions/min_length": 0.0, "completions/min_terminated_length": 284.0, "epoch": 0.046933333333333334, "grad_norm": 0.01006728783249855, "learning_rate": 4.333333333333334e-06, "loss": 0.0016, "num_tokens": 11608324.0, "reward": 1.2974810600280762, "reward_std": 0.34184491634368896, "rewards/accuracy_reward_step": 0.5078125, "rewards/brier_reward_group": 0.6303683519363403, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.6533058881759644, "step": 44 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5755058791359038, "calib/avg_num_step_conf": 5.21484375, "calib/ece": 0.30577689243027883, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.8685258964143426, "calib/gap": 0.014649986327590736, "calib/mean_conf": 0.9364541832669323, "calib/mu_c": 0.9418238993710691, "calib/mu_w": 0.9271739130434784, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30438247011952185, "calib/std_conf": 0.04392787565308283, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 561.43359375, "completions/mean_terminated_length": 565.8543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.048, "grad_norm": 0.009378692135214806, "learning_rate": 4.305555555555556e-06, "loss": 0.0162, "num_tokens": 11857099.0, "reward": 1.4878321886062622, "reward_std": 0.35901641845703125, "rewards/accuracy_reward_step": 0.62109375, "rewards/brier_reward_group": 0.7700293064117432, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.75161212682724, "step": 45 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.52506435006435, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.3822310756972112, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9083665338645418, "calib/gap": 0.0014536679536677477, "calib/mean_conf": 0.94, "calib/mu_c": 0.9406428571428571, "calib/mu_w": 0.9391891891891894, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3822310756972112, "calib/std_conf": 0.022190770128241635, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2241.0, "completions/max_terminated_length": 2241.0, "completions/mean_length": 561.3125, "completions/mean_terminated_length": 565.7322998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.04906666666666667, "grad_norm": 0.010353241115808487, "learning_rate": 4.277777777777778e-06, "loss": -0.0025, "num_tokens": 12105563.0, "reward": 1.3909752368927002, "reward_std": 0.3092753291130066, "rewards/accuracy_reward_step": 0.546875, "rewards/brier_reward_group": 0.6861876845359802, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7292758822441101, "step": 46 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6003111471861471, "calib/avg_num_step_conf": 4.73046875, "calib/ece": 0.3187999999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.916, "calib/gap": 0.01726190476190481, "calib/mean_conf": 0.9348, "calib/mu_c": 0.9414285714285715, "calib/mu_w": 0.9241666666666667, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3187999999999999, "calib/std_conf": 0.0615805163992638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2187.0, "completions/max_terminated_length": 2187.0, "completions/mean_length": 546.3046875, "completions/mean_terminated_length": 552.7826538085938, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.050133333333333335, "grad_norm": 0.011031719855964184, "learning_rate": 4.25e-06, "loss": 0.0258, "num_tokens": 12351393.0, "reward": 1.4612222909927368, "reward_std": 0.2621772885322571, "rewards/accuracy_reward_step": 0.6015625, "rewards/brier_reward_group": 0.7333366870880127, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7521774768829346, "step": 47 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5540465631929046, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.41752941176470565, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8823529411764706, "calib/gap": 0.004502956393200308, "calib/mean_conf": 0.9351764705882352, "calib/mu_c": 0.9373484848484848, "calib/mu_w": 0.9328455284552845, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.41752941176470565, "calib/std_conf": 0.02623925055896591, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 487.90625, "completions/mean_terminated_length": 493.69171142578125, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.0512, "grad_norm": 0.029779616743326187, "learning_rate": 4.222222222222223e-06, "loss": 0.0031, "num_tokens": 12579985.0, "reward": 1.3629781007766724, "reward_std": 0.3381595015525818, "rewards/accuracy_reward_step": 0.515625, "rewards/brier_reward_group": 0.69517982006073, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7098574638366699, "step": 48 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.586783891094725, "calib/avg_num_step_conf": 4.38671875, "calib/ece": 0.25755905511811017, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9015748031496063, "calib/gap": 0.01291973908111177, "calib/mean_conf": 0.9347244094488188, "calib/mu_c": 0.9388953488372094, "calib/mu_w": 0.9259756097560976, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25755905511811017, "calib/std_conf": 0.03446615986489285, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2302.0, "completions/max_terminated_length": 2302.0, "completions/mean_length": 480.9375, "completions/mean_terminated_length": 486.6403503417969, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.05226666666666667, "grad_norm": 0.012473726645112038, "learning_rate": 4.194444444444445e-06, "loss": 0.0526, "num_tokens": 12807641.0, "reward": 1.5611481666564941, "reward_std": 0.28796297311782837, "rewards/accuracy_reward_step": 0.671875, "rewards/brier_reward_group": 0.8108144402503967, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7619033455848694, "step": 49 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.593910034602076, "calib/avg_num_step_conf": 4.1640625, "calib/ece": 0.26933333333333315, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9058823529411765, "calib/gap": 0.009529411764705564, "calib/mean_conf": 0.9359999999999999, "calib/mu_c": 0.9391764705882353, "calib/mu_w": 0.9296470588235297, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26933333333333315, "calib/std_conf": 0.02181203121862684, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 481.08984375, "completions/mean_terminated_length": 486.79449462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.05333333333333334, "grad_norm": 0.013286111876368523, "learning_rate": 4.166666666666667e-06, "loss": 0.0492, "num_tokens": 13036160.0, "reward": 1.549306869506836, "reward_std": 0.2995968759059906, "rewards/accuracy_reward_step": 0.6640625, "rewards/brier_reward_group": 0.798114538192749, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7506750822067261, "step": 50 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6640414360720874, "calib/avg_num_step_conf": 3.66015625, "calib/ece": 0.2511764705882351, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8588235294117647, "calib/gap": 0.012956577266922231, "calib/mean_conf": 0.9318039215686273, "calib/mu_c": 0.9359195402298852, "calib/mu_w": 0.922962962962963, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2503137254901959, "calib/std_conf": 0.024636191779688926, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 455.47265625, "completions/mean_terminated_length": 460.87353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.0544, "grad_norm": 0.0234882440418005, "learning_rate": 4.138888888888889e-06, "loss": -0.0162, "num_tokens": 13262057.0, "reward": 1.5801244974136353, "reward_std": 0.3207731246948242, "rewards/accuracy_reward_step": 0.6796875, "rewards/brier_reward_group": 0.8323727250099182, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7771878242492676, "step": 51 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5761648745519713, "calib/avg_num_step_conf": 3.64453125, "calib/ece": 0.20206349206349206, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9444444444444444, "calib/gap": 0.0036852394916911946, "calib/mean_conf": 0.938174603174603, "calib/mu_c": 0.9391397849462366, "calib/mu_w": 0.9354545454545454, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20107142857142857, "calib/std_conf": 0.024845580030756997, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2603.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 425.95703125, "completions/mean_terminated_length": 431.0079345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.055466666666666664, "grad_norm": 0.020434802398085594, "learning_rate": 4.111111111111111e-06, "loss": 0.0314, "num_tokens": 13479054.0, "reward": 1.6307379007339478, "reward_std": 0.25411444902420044, "rewards/accuracy_reward_step": 0.7265625, "rewards/brier_reward_group": 0.8398484587669373, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8081033229827881, "step": 52 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6889456506314745, "calib/avg_num_step_conf": 2.90234375, "calib/ece": 0.26243137254901955, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9098039215686274, "calib/gap": 0.017324393358876145, "calib/mean_conf": 0.9447843137254901, "calib/mu_c": 0.950287356321839, "calib/mu_w": 0.9329629629629629, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26243137254901955, "calib/std_conf": 0.02623895750969716, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 413.1640625, "completions/mean_terminated_length": 418.0632629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.05653333333333333, "grad_norm": 0.022565390914678574, "learning_rate": 4.083333333333334e-06, "loss": -0.0037, "num_tokens": 13690648.0, "reward": 1.5788846015930176, "reward_std": 0.28395211696624756, "rewards/accuracy_reward_step": 0.6796875, "rewards/brier_reward_group": 0.8142070770263672, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7903937101364136, "step": 53 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6343967730106344, "calib/avg_num_step_conf": 2.796875, "calib/ece": 0.16806640624999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.921875, "calib/gap": 0.010365786578657832, "calib/mean_conf": 0.95095703125, "calib/mu_c": 0.9531435643564357, "calib/mu_w": 0.9427777777777778, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16498046874999997, "calib/std_conf": 0.03614697252103588, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1085.0, "completions/max_terminated_length": 1085.0, "completions/mean_length": 344.88671875, "completions/mean_terminated_length": 350.36114501953125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.0576, "grad_norm": 0.01560815516859293, "learning_rate": 4.055555555555556e-06, "loss": -0.0029, "num_tokens": 13885171.0, "reward": 1.7181185483932495, "reward_std": 0.2507132887840271, "rewards/accuracy_reward_step": 0.7890625, "rewards/brier_reward_group": 0.8910635709762573, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.825160801410675, "step": 54 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6021959459459459, "calib/avg_num_step_conf": 2.7265625, "calib/ece": 0.39780000000000004, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9803921568627451, "calib/gap": 0.02071002252252252, "calib/mean_conf": 0.9625058823529412, "calib/mu_c": 0.9715208333333334, "calib/mu_w": 0.9508108108108109, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39780000000000004, "calib/std_conf": 0.06857426891739195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2252.0, "completions/max_terminated_length": 2252.0, "completions/mean_length": 367.59765625, "completions/mean_terminated_length": 371.95654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.058666666666666666, "grad_norm": 0.012119713239371777, "learning_rate": 4.027777777777779e-06, "loss": 0.0246, "num_tokens": 14087100.0, "reward": 1.4041357040405273, "reward_std": 0.3639181852340698, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.7138887643814087, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6604669690132141, "step": 55 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5773824731732242, "calib/avg_num_step_conf": 2.328125, "calib/ece": 0.37866666666666676, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9725490196078431, "calib/gap": 0.008937787429739608, "calib/mean_conf": 0.9747450980392158, "calib/mu_c": 0.978355263157895, "calib/mu_w": 0.9694174757281554, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37866666666666676, "calib/std_conf": 0.041537057934489446, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 810.0, "completions/max_terminated_length": 810.0, "completions/mean_length": 381.9140625, "completions/mean_terminated_length": 387.9762268066406, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.05973333333333333, "grad_norm": 0.01059749536216259, "learning_rate": 4.000000000000001e-06, "loss": 0.0108, "num_tokens": 14291710.0, "reward": 1.446723222732544, "reward_std": 0.29063791036605835, "rewards/accuracy_reward_step": 0.59375, "rewards/brier_reward_group": 0.7016832828521729, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7258347272872925, "step": 56 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5818525654749162, "calib/avg_num_step_conf": 2.1015625, "calib/ece": 0.30692913385826764, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9921259842519685, "calib/gap": 0.004722757439520442, "calib/mean_conf": 0.9880314960629922, "calib/mu_c": 0.9895375722543353, "calib/mu_w": 0.9848148148148148, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.30692913385826764, "calib/std_conf": 0.0161459568122748, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 359.89453125, "completions/mean_terminated_length": 364.1620788574219, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.0608, "grad_norm": 0.015423390083014965, "learning_rate": 3.972222222222223e-06, "loss": -0.0222, "num_tokens": 14490635.0, "reward": 1.5489262342453003, "reward_std": 0.3717479705810547, "rewards/accuracy_reward_step": 0.67578125, "rewards/brier_reward_group": 0.7912935018539429, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7403486967086792, "step": 57 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49721875, "calib/avg_num_step_conf": 1.921875, "calib/ece": 0.4861260869565219, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": 0.0032710937500001203, "calib/mean_conf": 0.9920549407114626, "calib/mu_c": 0.9936710937500001, "calib/mu_w": 0.9904, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4861260869565219, "calib/std_conf": 0.03717970707095284, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 371.55859375, "completions/mean_terminated_length": 375.9644470214844, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.06186666666666667, "grad_norm": 0.014439908787608147, "learning_rate": 3.944444444444445e-06, "loss": -0.0435, "num_tokens": 14692074.0, "reward": 1.2871463298797607, "reward_std": 0.3386479616165161, "rewards/accuracy_reward_step": 0.5, "rewards/brier_reward_group": 0.6030274629592896, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.5768078565597534, "step": 58 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5329601990049752, "calib/avg_num_step_conf": 1.83984375, "calib/ece": 0.4704244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000775833333333309, "calib/mean_conf": 0.9979834645669291, "calib/mu_c": 0.99835, "calib/mu_w": 0.9975741666666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4704244094488189, "calib/std_conf": 0.004194649080693384, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2227.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 351.65625, "completions/mean_terminated_length": 354.4252014160156, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.06293333333333333, "grad_norm": 0.011451630853116512, "learning_rate": 3.916666666666667e-06, "loss": 0.0039, "num_tokens": 14888346.0, "reward": 1.3189291954040527, "reward_std": 0.3809748589992523, "rewards/accuracy_reward_step": 0.5234375, "rewards/brier_reward_group": 0.6371943950653076, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.5838346481323242, "step": 59 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.526216870540265, "calib/avg_num_step_conf": 1.58203125, "calib/ece": 0.4265624505928854, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9841897233201581, "calib/gap": 0.0077425395005098485, "calib/mean_conf": 0.9829260869565217, "calib/mu_c": 0.9862618055555556, "calib/mu_w": 0.9785192660550458, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.4201592885375494, "calib/std_conf": 0.11431142691534942, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 328.60546875, "completions/mean_terminated_length": 332.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.064, "grad_norm": 0.013444377109408379, "learning_rate": 3.88888888888889e-06, "loss": -0.0305, "num_tokens": 15081325.0, "reward": 1.3731634616851807, "reward_std": 0.30295276641845703, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.6537109613418579, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6358177065849304, "step": 60 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4524908424908425, "calib/avg_num_step_conf": 1.61328125, "calib/ece": 0.30682055335968383, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0009995384615386627, "calib/mean_conf": 0.9985201581027668, "calib/mu_c": 0.9982119999999999, "calib/mu_w": 0.9992115384615385, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.30682055335968383, "calib/std_conf": 0.003528028479936166, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2243.0, "completions/max_terminated_length": 2243.0, "completions/mean_length": 277.09765625, "completions/mean_terminated_length": 280.3834228515625, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.06506666666666666, "grad_norm": 0.014007938094437122, "learning_rate": 3.861111111111112e-06, "loss": 0.0207, "num_tokens": 15256326.0, "reward": 1.5428268909454346, "reward_std": 0.276619553565979, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.7586652040481567, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7095175981521606, "step": 61 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5042087542087542, "calib/avg_num_step_conf": 1.22265625, "calib/ece": 0.38059921568627464, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9882352941176471, "calib/gap": 0.010356332556332615, "calib/mean_conf": 0.9923639215686274, "calib/mu_c": 0.9963846153846154, "calib/mu_w": 0.9860282828282828, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.38059921568627464, "calib/std_conf": 0.0388025341843945, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1334.0, "completions/max_terminated_length": 1334.0, "completions/mean_length": 288.89453125, "completions/mean_terminated_length": 293.48016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.06613333333333334, "grad_norm": 0.012418070808053017, "learning_rate": 3.833333333333334e-06, "loss": -0.0151, "num_tokens": 15437363.0, "reward": 1.4541146755218506, "reward_std": 0.4250434637069702, "rewards/accuracy_reward_step": 0.609375, "rewards/brier_reward_group": 0.7418953776359558, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6683136224746704, "step": 62 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4864263462394304, "calib/avg_num_step_conf": 1.31640625, "calib/ece": 0.4183389763779528, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0005011316676203359, "calib/mean_conf": 0.9970791338582677, "calib/mu_c": 0.9968680272108844, "calib/mu_w": 0.9973691588785047, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4183389763779528, "calib/std_conf": 0.004625374813264798, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2210.0, "completions/max_terminated_length": 2210.0, "completions/mean_length": 320.89453125, "completions/mean_terminated_length": 324.6996154785156, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.0672, "grad_norm": 0.015316436998546124, "learning_rate": 3.8055555555555556e-06, "loss": -0.013, "num_tokens": 15628152.0, "reward": 1.3845548629760742, "reward_std": 0.3264836072921753, "rewards/accuracy_reward_step": 0.57421875, "rewards/brier_reward_group": 0.6657936573028564, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6146135330200195, "step": 63 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5652515186676677, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.3528916666666667, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9920634920634921, "calib/gap": 0.006846201624462478, "calib/mean_conf": 0.9839234126984127, "calib/mu_c": 0.9863956521739131, "calib/mu_w": 0.9795494505494506, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.34896309523809527, "calib/std_conf": 0.08773991941973767, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 264.7734375, "completions/mean_terminated_length": 268.9761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.06826666666666667, "grad_norm": 0.01596958003938198, "learning_rate": 3.777777777777778e-06, "loss": -0.0237, "num_tokens": 15799710.0, "reward": 1.4842865467071533, "reward_std": 0.4227965772151947, "rewards/accuracy_reward_step": 0.62890625, "rewards/brier_reward_group": 0.766762375831604, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6938216686248779, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5055081507896078, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.39497215686274506, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.009473484462557025, "calib/mean_conf": 0.987129019607843, "calib/mu_c": 0.9909927152317878, "calib/mu_w": 0.9815192307692308, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.39497215686274506, "calib/std_conf": 0.060790807008016354, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2194.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 240.83203125, "completions/mean_terminated_length": 243.68775939941406, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.06933333333333333, "grad_norm": 0.016611957922577858, "learning_rate": 3.7500000000000005e-06, "loss": -0.023, "num_tokens": 15966387.0, "reward": 1.412438988685608, "reward_std": 0.22559529542922974, "rewards/accuracy_reward_step": 0.58984375, "rewards/brier_reward_group": 0.6649578809738159, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6488604545593262, "step": 65 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5029582152101566, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.5169803921568626, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9882352941176471, "calib/gap": -0.011903734746702588, "calib/mean_conf": 0.9813725490196078, "calib/mu_c": 0.9751639344262295, "calib/mu_w": 0.9870676691729321, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5099607843137254, "calib/std_conf": 0.08009356312642617, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2223.0, "completions/max_terminated_length": 2223.0, "completions/mean_length": 282.66015625, "completions/mean_terminated_length": 286.0118713378906, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0704, "grad_norm": 0.01374450046569109, "learning_rate": 3.7222222222222225e-06, "loss": 0.0006, "num_tokens": 16145100.0, "reward": 1.2697900533676147, "reward_std": 0.32513415813446045, "rewards/accuracy_reward_step": 0.4765625, "rewards/brier_reward_group": 0.5864582061767578, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6020768880844116, "step": 66 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5362654320987654, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.4155555555555557, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": 0.007199074074074163, "calib/mean_conf": 0.986984126984127, "calib/mu_c": 0.9900694444444444, "calib/mu_w": 0.9828703703703703, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4155555555555557, "calib/std_conf": 0.03877631909620279, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2185.0, "completions/max_terminated_length": 2185.0, "completions/mean_length": 253.66015625, "completions/mean_terminated_length": 257.6865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.07146666666666666, "grad_norm": 0.02686356194317341, "learning_rate": 3.694444444444445e-06, "loss": -0.0081, "num_tokens": 16315045.0, "reward": 1.3880245685577393, "reward_std": 0.3380063772201538, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.6746812462806702, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6586668491363525, "step": 67 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5094562647754137, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.4336078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9882352941176471, "calib/gap": 0.0020082120194101494, "calib/mean_conf": 0.986549019607843, "calib/mu_c": 0.9874468085106383, "calib/mu_w": 0.9854385964912281, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4336078431372549, "calib/std_conf": 0.025264338755730805, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 736.0, "completions/max_terminated_length": 736.0, "completions/mean_length": 245.125, "completions/mean_terminated_length": 249.01588439941406, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.07253333333333334, "grad_norm": 0.018724871799349785, "learning_rate": 3.6666666666666666e-06, "loss": -0.0088, "num_tokens": 16481885.0, "reward": 1.3662382364273071, "reward_std": 0.2699292302131653, "rewards/accuracy_reward_step": 0.55078125, "rewards/brier_reward_group": 0.6460593938827515, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6392064094543457, "step": 68 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5151656993072458, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.4046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.98046875, "calib/gap": 0.0201435436559948, "calib/mean_conf": 0.9789062500000001, "calib/mu_c": 0.9874829931972791, "calib/mu_w": 0.9673394495412843, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4046875, "calib/std_conf": 0.0800169354632974, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 244.9375, "completions/mean_terminated_length": 248.82540893554688, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0736, "grad_norm": 0.02710888721048832, "learning_rate": 3.638888888888889e-06, "loss": -0.0106, "num_tokens": 16649085.0, "reward": 1.4051319360733032, "reward_std": 0.2940649092197418, "rewards/accuracy_reward_step": 0.57421875, "rewards/brier_reward_group": 0.6779281497001648, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6535372138023376, "step": 69 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5332393483709273, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.4245275590551183, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9724409448818898, "calib/gap": 0.029003759398496087, "calib/mean_conf": 0.9701968503937008, "calib/mu_c": 0.9832142857142856, "calib/mu_w": 0.9542105263157895, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4217716535433073, "calib/std_conf": 0.11772265146200765, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2166.0, "completions/max_terminated_length": 2166.0, "completions/mean_length": 289.89453125, "completions/mean_terminated_length": 294.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.07466666666666667, "grad_norm": 0.014959375374019146, "learning_rate": 3.6111111111111115e-06, "loss": -0.0152, "num_tokens": 16830290.0, "reward": 1.3668923377990723, "reward_std": 0.24334564805030823, "rewards/accuracy_reward_step": 0.546875, "rewards/brier_reward_group": 0.6424806118011475, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6610262393951416, "step": 70 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5500970066518847, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.3348412698412699, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9523809523809523, "calib/gap": 0.014972283813747267, "calib/mean_conf": 0.9626984126984128, "calib/mu_c": 0.9679268292682928, "calib/mu_w": 0.9529545454545455, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.32337301587301587, "calib/std_conf": 0.1297397437510634, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2236.0, "completions/max_terminated_length": 2236.0, "completions/mean_length": 284.44140625, "completions/mean_terminated_length": 287.8142395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.07573333333333333, "grad_norm": 0.016708169132471085, "learning_rate": 3.5833333333333335e-06, "loss": 0.0293, "num_tokens": 17007515.0, "reward": 1.4961044788360596, "reward_std": 0.44954103231430054, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.773381233215332, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6954114437103271, "step": 71 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.57889874353289, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.45803921568627454, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.021350702143385192, "calib/mean_conf": 0.9756862745098039, "calib/mu_c": 0.9859848484848486, "calib/mu_w": 0.9646341463414634, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.45803921568627454, "calib/std_conf": 0.07645721351112288, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 961.0, "completions/max_terminated_length": 961.0, "completions/mean_length": 246.76171875, "completions/mean_terminated_length": 250.6785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.0768, "grad_norm": 0.017632341012358665, "learning_rate": 3.555555555555556e-06, "loss": 0.0086, "num_tokens": 17175094.0, "reward": 1.341632604598999, "reward_std": 0.2776060104370117, "rewards/accuracy_reward_step": 0.515625, "rewards/brier_reward_group": 0.6244671940803528, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6873756647109985, "step": 72 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5589257840215696, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.3068235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9647058823529412, "calib/gap": 0.011943380161770878, "calib/mean_conf": 0.9658039215686275, "calib/mu_c": 0.9695977011494253, "calib/mu_w": 0.9576543209876545, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2951372549019608, "calib/std_conf": 0.128643148253082, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 736.0, "completions/max_terminated_length": 736.0, "completions/mean_length": 253.40625, "completions/mean_terminated_length": 257.4285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.07786666666666667, "grad_norm": 0.017207786440849304, "learning_rate": 3.5277777777777784e-06, "loss": -0.0192, "num_tokens": 17346998.0, "reward": 1.5725383758544922, "reward_std": 0.37019598484039307, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.8157491683959961, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.747841477394104, "step": 73 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5547945205479452, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.4038425196850395, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9488188976377953, "calib/gap": 0.018434043632673647, "calib/mean_conf": 0.9712440944881889, "calib/mu_c": 0.9790821917808217, "calib/mu_w": 0.960648148148148, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.40014173228346467, "calib/std_conf": 0.09582823516115335, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2227.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 255.73828125, "completions/mean_terminated_length": 257.751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.07893333333333333, "grad_norm": 0.014544480480253696, "learning_rate": 3.5e-06, "loss": 0.007, "num_tokens": 17516395.0, "reward": 1.4134066104888916, "reward_std": 0.3412873446941376, "rewards/accuracy_reward_step": 0.5703125, "rewards/brier_reward_group": 0.6993246078491211, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6886764764785767, "step": 74 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5625055100061712, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2105859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9609375, "calib/gap": 0.026311381468747008, "calib/mean_conf": 0.9723828125, "calib/mu_c": 0.9782412060301505, "calib/mu_w": 0.9519298245614035, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2028125, "calib/std_conf": 0.0932328755299859, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 693.0, "completions/max_terminated_length": 693.0, "completions/mean_length": 240.109375, "completions/mean_terminated_length": 243.920654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.08, "grad_norm": 0.020223695784807205, "learning_rate": 3.4722222222222224e-06, "loss": 0.022, "num_tokens": 17682615.0, "reward": 1.7042614221572876, "reward_std": 0.23040184378623962, "rewards/accuracy_reward_step": 0.77734375, "rewards/brier_reward_group": 0.8712968826293945, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8363738059997559, "step": 75 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5862294130450555, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.26847656250000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.9140625, "calib/gap": 0.05851628818109267, "calib/mean_conf": 0.9375390625000002, "calib/mu_c": 0.9551396648044692, "calib/mu_w": 0.8966233766233765, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2533984375000001, "calib/std_conf": 0.1831660535187159, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1197.0, "completions/max_terminated_length": 1197.0, "completions/mean_length": 255.4296875, "completions/mean_terminated_length": 259.484130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.08106666666666666, "grad_norm": 0.012735992670059204, "learning_rate": 3.444444444444445e-06, "loss": -0.0105, "num_tokens": 17851061.0, "reward": 1.58017897605896, "reward_std": 0.1934841275215149, "rewards/accuracy_reward_step": 0.69921875, "rewards/brier_reward_group": 0.7768750190734863, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7469661235809326, "step": 76 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5713551984738425, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2831496062992125, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9291338582677166, "calib/gap": 0.05980923031770535, "calib/mean_conf": 0.9440157480314961, "calib/mu_c": 0.9621468926553673, "calib/mu_w": 0.902337662337662, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26515748031496056, "calib/std_conf": 0.18035948286432327, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 286.921875, "completions/mean_terminated_length": 290.3241271972656, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.08213333333333334, "grad_norm": 0.012091055512428284, "learning_rate": 3.416666666666667e-06, "loss": -0.0109, "num_tokens": 18029177.0, "reward": 1.5749398469924927, "reward_std": 0.30894869565963745, "rewards/accuracy_reward_step": 0.69140625, "rewards/brier_reward_group": 0.7807500958442688, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7690092325210571, "step": 77 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5302408854166667, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.35859375000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9765625, "calib/gap": 0.01866666666666661, "calib/mean_conf": 0.9796875, "calib/mu_c": 0.9866875, "calib/mu_w": 0.9680208333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35664062500000004, "calib/std_conf": 0.07192050190140499, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 861.0, "completions/max_terminated_length": 861.0, "completions/mean_length": 323.45703125, "completions/mean_terminated_length": 328.5912780761719, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.0832, "grad_norm": 0.0124210799112916, "learning_rate": 3.3888888888888893e-06, "loss": -0.0002, "num_tokens": 18220006.0, "reward": 1.478103518486023, "reward_std": 0.24688759446144104, "rewards/accuracy_reward_step": 0.625, "rewards/brier_reward_group": 0.7166808247566223, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7035453915596008, "step": 78 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.572663139329806, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.2931953125000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9765625, "calib/gap": 0.031783985890652455, "calib/mean_conf": 0.9767890625000002, "calib/mu_c": 0.9868457142857142, "calib/mu_w": 0.9550617283950618, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2931953125000002, "calib/std_conf": 0.07936300621430045, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 322.65625, "completions/mean_terminated_length": 327.7778015136719, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.08426666666666667, "grad_norm": 0.0173186007887125, "learning_rate": 3.3611111111111117e-06, "loss": 0.0059, "num_tokens": 18408982.0, "reward": 1.5656778812408447, "reward_std": 0.26752346754074097, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.779755711555481, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7720180749893188, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5068493150684932, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.27507812500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00027397260273953616, "calib/mean_conf": 0.9899218750000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9897260273972602, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.27507812500000006, "calib/std_conf": 0.0012475562048961974, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1052.0, "completions/max_terminated_length": 1052.0, "completions/mean_length": 272.59375, "completions/mean_terminated_length": 276.920654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.08533333333333333, "grad_norm": 0.017474321648478508, "learning_rate": 3.3333333333333333e-06, "loss": -0.02, "num_tokens": 18580926.0, "reward": 1.601799726486206, "reward_std": 0.27312523126602173, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.8032468557357788, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7523896098136902, "step": 80 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5323647871116225, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.2905098039215687, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9803921568627451, "calib/gap": 0.024984177215189973, "calib/mean_conf": 0.9785098039215687, "calib/mu_c": 0.9862500000000001, "calib/mu_w": 0.9612658227848101, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2894117647058824, "calib/std_conf": 0.08085896037243229, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2236.0, "completions/max_terminated_length": 2236.0, "completions/mean_length": 332.3515625, "completions/mean_terminated_length": 336.2925109863281, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.0864, "grad_norm": 0.015995075926184654, "learning_rate": 3.3055555555555558e-06, "loss": -0.0105, "num_tokens": 18772256.0, "reward": 1.5499467849731445, "reward_std": 0.24558739364147186, "rewards/accuracy_reward_step": 0.6875, "rewards/brier_reward_group": 0.7626110315322876, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.718425989151001, "step": 81 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5240963855421686, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.30980468749999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.01361445783132531, "calib/mean_conf": 0.9855859375000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9763855421686746, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.30980468749999995, "calib/std_conf": 0.05007463606703589, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 281.83203125, "completions/mean_terminated_length": 286.3055725097656, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.08746666666666666, "grad_norm": 0.01421276107430458, "learning_rate": 3.277777777777778e-06, "loss": -0.0187, "num_tokens": 18949957.0, "reward": 1.5429801940917969, "reward_std": 0.23315325379371643, "rewards/accuracy_reward_step": 0.67578125, "rewards/brier_reward_group": 0.7639062404632568, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7127020359039307, "step": 82 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5580784574468086, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.33783464566929144, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9645669291338582, "calib/gap": 0.0621316489361704, "calib/mean_conf": 0.9638188976377953, "calib/mu_c": 0.9868124999999999, "calib/mu_w": 0.9246808510638295, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.3358661417322835, "calib/std_conf": 0.14154929679234315, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 389.89453125, "completions/mean_terminated_length": 394.5177917480469, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.08853333333333334, "grad_norm": 0.008663997985422611, "learning_rate": 3.2500000000000002e-06, "loss": -0.0166, "num_tokens": 19157034.0, "reward": 1.4604687690734863, "reward_std": 0.23011240363121033, "rewards/accuracy_reward_step": 0.625, "rewards/brier_reward_group": 0.7134954929351807, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.6830668449401855, "step": 83 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5376438134966023, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.3225625000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.94921875, "calib/gap": 0.027094664603377283, "calib/mean_conf": 0.9592031250000002, "calib/mu_c": 0.9686227544910178, "calib/mu_w": 0.9415280898876405, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.31471093750000007, "calib/std_conf": 0.13959236745873457, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1104.0, "completions/max_terminated_length": 1104.0, "completions/mean_length": 310.03515625, "completions/mean_terminated_length": 314.95635986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.0896, "grad_norm": 0.014482676982879639, "learning_rate": 3.2222222222222227e-06, "loss": 0.0079, "num_tokens": 19342323.0, "reward": 1.518557071685791, "reward_std": 0.255326509475708, "rewards/accuracy_reward_step": 0.65234375, "rewards/brier_reward_group": 0.7570656538009644, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7234127521514893, "step": 84 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5612244897959183, "calib/avg_num_step_conf": 0.94921875, "calib/ece": 0.3595686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.9529411764705882, "calib/gap": 0.038367346938775526, "calib/mean_conf": 0.9752549019607843, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9516326530612244, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.3595686274509804, "calib/std_conf": 0.0711408002229588, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1543.0, "completions/max_terminated_length": 1543.0, "completions/mean_length": 345.0390625, "completions/mean_terminated_length": 350.5158996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.09066666666666667, "grad_norm": 0.014872740022838116, "learning_rate": 3.1944444444444443e-06, "loss": 0.0112, "num_tokens": 19538477.0, "reward": 1.4445767402648926, "reward_std": 0.3293463885784149, "rewards/accuracy_reward_step": 0.61328125, "rewards/brier_reward_group": 0.7269442081451416, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.7154250741004944, "step": 85 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5202144433932513, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.38691406250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.97265625, "calib/gap": 0.015009145380006084, "calib/mean_conf": 0.9767578125000002, "calib/mu_c": 0.9829139072847679, "calib/mu_w": 0.9679047619047618, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.38691406250000004, "calib/std_conf": 0.08345584967642977, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1345.0, "completions/max_terminated_length": 1345.0, "completions/mean_length": 381.8046875, "completions/mean_terminated_length": 387.8651123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.09173333333333333, "grad_norm": 0.012381580658257008, "learning_rate": 3.1666666666666667e-06, "loss": -0.0121, "num_tokens": 19741731.0, "reward": 1.417504072189331, "reward_std": 0.18847745656967163, "rewards/accuracy_reward_step": 0.58984375, "rewards/brier_reward_group": 0.6652394533157349, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6532144546508789, "step": 86 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5466451959205583, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.1575396825396825, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9444444444444444, "calib/gap": 0.03254106280193214, "calib/mean_conf": 0.9665079365079366, "calib/mu_c": 0.9723188405797101, "calib/mu_w": 0.9397777777777779, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.15130952380952378, "calib/std_conf": 0.10351610150283469, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1513.0, "completions/max_terminated_length": 1513.0, "completions/mean_length": 329.05078125, "completions/mean_terminated_length": 338.3011779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.0928, "grad_norm": 0.012511593289673328, "learning_rate": 3.138888888888889e-06, "loss": -0.0192, "num_tokens": 19931464.0, "reward": 1.7223682403564453, "reward_std": 0.24959589540958405, "rewards/accuracy_reward_step": 0.80859375, "rewards/brier_reward_group": 0.8768448829650879, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.832940399646759, "step": 87 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5352112676056338, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.26070588235294123, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.984313725490196, "calib/gap": 0.027746478873239888, "calib/mean_conf": 0.9822745098039215, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9622535211267602, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26070588235294123, "calib/std_conf": 0.07082374655255184, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 371.45703125, "completions/mean_terminated_length": 377.35321044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.09386666666666667, "grad_norm": 0.011493110097944736, "learning_rate": 3.1111111111111116e-06, "loss": -0.0056, "num_tokens": 20136405.0, "reward": 1.6156119108200073, "reward_std": 0.20070913434028625, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.7902989387512207, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.812773585319519, "step": 88 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5699464961503327, "calib/avg_num_step_conf": 0.9765625, "calib/ece": 0.3442745098039215, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9254901960784314, "calib/gap": 0.055241419809473946, "calib/mean_conf": 0.9591764705882353, "calib/mu_c": 0.9801898734177213, "calib/mu_w": 0.9249484536082474, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3419215686274509, "calib/std_conf": 0.11982220585097834, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1936.0, "completions/max_terminated_length": 1936.0, "completions/mean_length": 430.6953125, "completions/mean_terminated_length": 437.5317687988281, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.09493333333333333, "grad_norm": 0.021054452285170555, "learning_rate": 3.0833333333333336e-06, "loss": -0.0069, "num_tokens": 20355551.0, "reward": 1.4646778106689453, "reward_std": 0.28685474395751953, "rewards/accuracy_reward_step": 0.6171875, "rewards/brier_reward_group": 0.731169581413269, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7056663036346436, "step": 89 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5331556909021697, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.265234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.953125, "calib/gap": 0.026733155690901844, "calib/mean_conf": 0.9652343750000001, "calib/mu_c": 0.9726486486486486, "calib/mu_w": 0.9459154929577468, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.25390625, "calib/std_conf": 0.12671304221886306, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1178.0, "completions/max_terminated_length": 1178.0, "completions/mean_length": 374.98046875, "completions/mean_terminated_length": 380.93255615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.096, "grad_norm": 0.010848317295312881, "learning_rate": 3.055555555555556e-06, "loss": -0.0062, "num_tokens": 20554866.0, "reward": 1.616170883178711, "reward_std": 0.21723613142967224, "rewards/accuracy_reward_step": 0.72265625, "rewards/brier_reward_group": 0.8079821467399597, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7817014455795288, "step": 90 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.524192425508215, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.3061172549019607, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9686274509803922, "calib/gap": 0.02881906850459459, "calib/mean_conf": 0.9767054901960786, "calib/mu_c": 0.9861988304093566, "calib/mu_w": 0.957379761904762, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3061172549019607, "calib/std_conf": 0.08316363016140202, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1454.0, "completions/max_terminated_length": 1454.0, "completions/mean_length": 411.9609375, "completions/mean_terminated_length": 418.5000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.09706666666666666, "grad_norm": 0.03383542597293854, "learning_rate": 3.0277777777777776e-06, "loss": -0.0261, "num_tokens": 20768040.0, "reward": 1.5384074449539185, "reward_std": 0.266568124294281, "rewards/accuracy_reward_step": 0.66796875, "rewards/brier_reward_group": 0.761809766292572, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7355697154998779, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5954464285714286, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.18816406250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.93359375, "calib/gap": 0.06655, "calib/mean_conf": 0.9682421875000001, "calib/mu_c": 0.9828, "calib/mu_w": 0.91625, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.187578125, "calib/std_conf": 0.08356109722960106, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 846.0, "completions/max_terminated_length": 846.0, "completions/mean_length": 339.3046875, "completions/mean_terminated_length": 344.69049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.09813333333333334, "grad_norm": 0.009851209819316864, "learning_rate": 3e-06, "loss": 0.0064, "num_tokens": 20961622.0, "reward": 1.707207441329956, "reward_std": 0.19086013734340668, "rewards/accuracy_reward_step": 0.78125, "rewards/brier_reward_group": 0.8576591610908508, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8617957234382629, "step": 92 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5352112676056338, "calib/avg_num_step_conf": 0.96875, "calib/ece": 0.2628346456692914, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9803149606299213, "calib/gap": 0.023943661971830954, "calib/mean_conf": 0.9833070866141732, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9660563380281688, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.2628346456692914, "calib/std_conf": 0.04723129747943542, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2236.0, "completions/max_terminated_length": 2236.0, "completions/mean_length": 374.01953125, "completions/mean_terminated_length": 379.95635986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0992, "grad_norm": 0.0120620746165514, "learning_rate": 2.9722222222222225e-06, "loss": 0.0069, "num_tokens": 21163147.0, "reward": 1.5767902135849, "reward_std": 0.1930733025074005, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.7593777179718018, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7509081363677979, "step": 93 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5295732838589982, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.2861111111111111, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9761904761904762, "calib/gap": 0.024550649350649545, "calib/mean_conf": 0.9805555555555555, "calib/mu_c": 0.9880571428571426, "calib/mu_w": 0.9635064935064931, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2861111111111111, "calib/std_conf": 0.06498236092233464, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2579.0, "completions/max_terminated_length": 2579.0, "completions/mean_length": 353.75390625, "completions/mean_terminated_length": 360.8008117675781, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.10026666666666667, "grad_norm": 0.009626048617064953, "learning_rate": 2.944444444444445e-06, "loss": 0.0035, "num_tokens": 21362388.0, "reward": 1.5426125526428223, "reward_std": 0.1176624447107315, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.7276296615600586, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7553207278251648, "step": 94 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6207837301587301, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.20176470588235298, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9254901960784314, "calib/gap": 0.13117807539682513, "calib/mean_conf": 0.953372549019608, "calib/mu_c": 0.98578125, "calib/mu_w": 0.8546031746031749, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2010980392156863, "calib/std_conf": 0.14701339952058975, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 422.109375, "completions/mean_terminated_length": 427.1146545410156, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.10133333333333333, "grad_norm": 0.010757964104413986, "learning_rate": 2.916666666666667e-06, "loss": -0.0067, "num_tokens": 21576576.0, "reward": 1.6540206670761108, "reward_std": 0.19147750735282898, "rewards/accuracy_reward_step": 0.75, "rewards/brier_reward_group": 0.84278404712677, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8045485019683838, "step": 95 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5908581899623333, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.16324218749999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.95703125, "calib/gap": 0.059284332688587904, "calib/mean_conf": 0.9745703125000001, "calib/mu_c": 0.9854545454545455, "calib/mu_w": 0.9261702127659576, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16070312499999997, "calib/std_conf": 0.07516005667009801, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1115.0, "completions/max_terminated_length": 1115.0, "completions/mean_length": 335.27734375, "completions/mean_terminated_length": 340.5992126464844, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.1024, "grad_norm": 0.009875332936644554, "learning_rate": 2.888888888888889e-06, "loss": 0.0047, "num_tokens": 21768223.0, "reward": 1.7429949045181274, "reward_std": 0.11069345474243164, "rewards/accuracy_reward_step": 0.81640625, "rewards/brier_reward_group": 0.8624937534332275, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8516731262207031, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4899955203822607, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2894509803921569, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9725490196078431, "calib/gap": -0.01741302075556217, "calib/mean_conf": 0.9734509803921568, "calib/mu_c": 0.9683977900552485, "calib/mu_w": 0.9858108108108107, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.27654901960784317, "calib/std_conf": 0.11195172519237315, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 864.0, "completions/max_terminated_length": 864.0, "completions/mean_length": 347.234375, "completions/mean_terminated_length": 352.7460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.10346666666666667, "grad_norm": 0.017515143379569054, "learning_rate": 2.861111111111111e-06, "loss": 0.024, "num_tokens": 21962187.0, "reward": 1.5750223398208618, "reward_std": 0.22100487351417542, "rewards/accuracy_reward_step": 0.70703125, "rewards/brier_reward_group": 0.7652446031570435, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7301572561264038, "step": 97 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5108540231379176, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.37219607843137265, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9725490196078431, "calib/gap": 0.00457818796308318, "calib/mean_conf": 0.9789411764705883, "calib/mu_c": 0.980700636942675, "calib/mu_w": 0.9761224489795918, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.36772549019607853, "calib/std_conf": 0.07737897869391891, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1349.0, "completions/max_terminated_length": 1349.0, "completions/mean_length": 401.78515625, "completions/mean_terminated_length": 408.1627197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.10453333333333334, "grad_norm": 0.015487849712371826, "learning_rate": 2.8333333333333335e-06, "loss": -0.0141, "num_tokens": 22171228.0, "reward": 1.4574542045593262, "reward_std": 0.2990908622741699, "rewards/accuracy_reward_step": 0.61328125, "rewards/brier_reward_group": 0.719641923904419, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6726752519607544, "step": 98 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5323135635635636, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.4135294117647059, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9647058823529412, "calib/gap": 0.02889076576576588, "calib/mean_conf": 0.9705490196078431, "calib/mu_c": 0.9831249999999999, "calib/mu_w": 0.954234234234234, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40968627450980394, "calib/std_conf": 0.11679583556326477, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2163.0, "completions/max_terminated_length": 2163.0, "completions/mean_length": 452.46484375, "completions/mean_terminated_length": 457.8300476074219, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.1056, "grad_norm": 0.011346619576215744, "learning_rate": 2.805555555555556e-06, "loss": -0.005, "num_tokens": 22392859.0, "reward": 1.381075382232666, "reward_std": 0.31315329670906067, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.6842589378356934, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.5978550314903259, "step": 99 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.499829966673468, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.33230468750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.98828125, "calib/gap": -0.0015949126028699245, "calib/mean_conf": 0.9850390625000001, "calib/mu_c": 0.9844970414201183, "calib/mu_w": 0.9860919540229882, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.3285937500000001, "calib/std_conf": 0.05423471880743085, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1726.0, "completions/max_terminated_length": 1726.0, "completions/mean_length": 419.6015625, "completions/mean_terminated_length": 426.2619323730469, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.10666666666666667, "grad_norm": 0.011520988307893276, "learning_rate": 2.7777777777777783e-06, "loss": -0.0082, "num_tokens": 22607685.0, "reward": 1.5046149492263794, "reward_std": 0.26636022329330444, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.7232612371444702, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.717073380947113, "step": 100 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.3298437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3298437500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 424.71875, "completions/mean_terminated_length": 431.4603271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.10773333333333333, "grad_norm": 0.009321141988039017, "learning_rate": 2.7500000000000004e-06, "loss": -0.0234, "num_tokens": 22823405.0, "reward": 1.5158978700637817, "reward_std": 0.21507184207439423, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.734685480594635, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6960934400558472, "step": 101 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2478125000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2478125000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 965.0, "completions/max_terminated_length": 965.0, "completions/mean_length": 328.71484375, "completions/mean_terminated_length": 333.93255615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.1088, "grad_norm": 0.01641284115612507, "learning_rate": 2.7222222222222224e-06, "loss": -0.0036, "num_tokens": 23014252.0, "reward": 1.6270155906677246, "reward_std": 0.12798628211021423, "rewards/accuracy_reward_step": 0.7421875, "rewards/brier_reward_group": 0.7765922546386719, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7705326080322266, "step": 102 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.3610937500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3610937500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 433.1640625, "completions/mean_terminated_length": 440.0397033691406, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.10986666666666667, "grad_norm": 0.023044515401124954, "learning_rate": 2.6944444444444444e-06, "loss": -0.0256, "num_tokens": 23229694.0, "reward": 1.4751359224319458, "reward_std": 0.16185157001018524, "rewards/accuracy_reward_step": 0.62890625, "rewards/brier_reward_group": 0.693356990814209, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6915618777275085, "step": 103 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.4235937500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4235937500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 965.0, "completions/max_terminated_length": 965.0, "completions/mean_length": 412.09765625, "completions/mean_terminated_length": 418.638916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.11093333333333333, "grad_norm": 0.022061089053750038, "learning_rate": 2.666666666666667e-06, "loss": 0.0253, "num_tokens": 23441871.0, "reward": 1.3992860317230225, "reward_std": 0.2274901121854782, "rewards/accuracy_reward_step": 0.56640625, "rewards/brier_reward_group": 0.6506617069244385, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6808575987815857, "step": 104 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5071428571428571, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.2647430830039527, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": 0.006999999999999895, "calib/mean_conf": 0.9880632411067194, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9829999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2647430830039527, "calib/std_conf": 0.030745094286679438, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 427.90234375, "completions/mean_terminated_length": 436.4263000488281, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.112, "grad_norm": 0.009036574512720108, "learning_rate": 2.6388888888888893e-06, "loss": 0.0088, "num_tokens": 23657174.0, "reward": 1.5890259742736816, "reward_std": 0.17864654958248138, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.7721549272537231, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7480111122131348, "step": 105 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5052631578947369, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.3601181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.010421052631579109, "calib/mean_conf": 0.9861023622047244, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9795789473684207, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3601181102362204, "calib/std_conf": 0.0619957243440197, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2409.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 441.67578125, "completions/mean_terminated_length": 446.9130554199219, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.11306666666666666, "grad_norm": 0.007855425588786602, "learning_rate": 2.6111111111111113e-06, "loss": -0.0065, "num_tokens": 23874827.0, "reward": 1.466196894645691, "reward_std": 0.19247515499591827, "rewards/accuracy_reward_step": 0.62109375, "rewards/brier_reward_group": 0.6923063397407532, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7115440368652344, "step": 106 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2751562500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2751562500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1012.0, "completions/max_terminated_length": 1012.0, "completions/mean_length": 397.40625, "completions/mean_terminated_length": 403.71429443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.11413333333333334, "grad_norm": 0.021294519305229187, "learning_rate": 2.5833333333333337e-06, "loss": -0.0191, "num_tokens": 24081179.0, "reward": 1.6048345565795898, "reward_std": 0.22603082656860352, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.7892944812774658, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.770668625831604, "step": 107 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.2087500000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2087500000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1692.0, "completions/max_terminated_length": 1692.0, "completions/mean_length": 437.31640625, "completions/mean_terminated_length": 444.2579650878906, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1152, "grad_norm": 0.07678169012069702, "learning_rate": 2.5555555555555557e-06, "loss": -0.003, "num_tokens": 24296364.0, "reward": 1.6970703601837158, "reward_std": 0.17200732231140137, "rewards/accuracy_reward_step": 0.78125, "rewards/brier_reward_group": 0.8411890864372253, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8220928311347961, "step": 108 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5045454545454545, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.42287401574803163, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00045454545454559625, "calib/mean_conf": 0.9898031496062993, "calib/mu_c": 0.99, "calib/mu_w": 0.9895454545454544, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.42287401574803163, "calib/std_conf": 0.0031310971890919055, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2177.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 448.69921875, "completions/mean_terminated_length": 455.8214416503906, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.11626666666666667, "grad_norm": 0.035480234771966934, "learning_rate": 2.5277777777777778e-06, "loss": -0.0236, "num_tokens": 24515831.0, "reward": 1.3797364234924316, "reward_std": 0.21544721722602844, "rewards/accuracy_reward_step": 0.5625, "rewards/brier_reward_group": 0.6426604986190796, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6419100761413574, "step": 109 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.3037254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3037254901960784, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2203.0, "completions/max_terminated_length": 2203.0, "completions/mean_length": 392.36328125, "completions/mean_terminated_length": 397.0158386230469, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.11733333333333333, "grad_norm": 0.009675406850874424, "learning_rate": 2.5e-06, "loss": -0.0201, "num_tokens": 24721196.0, "reward": 1.5546002388000488, "reward_std": 0.3171882629394531, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.774333119392395, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7487553954124451, "step": 110 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.512987012987013, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.2879215686274511, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.013376623376623709, "calib/mean_conf": 0.9859607843137255, "calib/mu_c": 0.99, "calib/mu_w": 0.9766233766233763, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2879215686274511, "calib/std_conf": 0.06191517308550772, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2337.0, "completions/max_terminated_length": 2337.0, "completions/mean_length": 445.23828125, "completions/mean_terminated_length": 452.3055725097656, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.1184, "grad_norm": 0.009541746228933334, "learning_rate": 2.4722222222222226e-06, "loss": 0.0303, "num_tokens": 24942585.0, "reward": 1.5594208240509033, "reward_std": 0.22527828812599182, "rewards/accuracy_reward_step": 0.6953125, "rewards/brier_reward_group": 0.7607840895652771, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7347120046615601, "step": 111 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.96484375, "calib/ece": 0.3193172690763053, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.3193172690763053, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2270.0, "completions/max_terminated_length": 2270.0, "completions/mean_length": 499.984375, "completions/mean_terminated_length": 509.9442443847656, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.11946666666666667, "grad_norm": 0.011667572893202305, "learning_rate": 2.4444444444444447e-06, "loss": -0.0236, "num_tokens": 25178501.0, "reward": 1.4918447732925415, "reward_std": 0.2304956316947937, "rewards/accuracy_reward_step": 0.65234375, "rewards/brier_reward_group": 0.712691605091095, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7156248688697815, "step": 112 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.3128346456692913, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3128346456692913, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 431.53125, "completions/mean_terminated_length": 438.3809814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.12053333333333334, "grad_norm": 0.01603376492857933, "learning_rate": 2.4166666666666667e-06, "loss": -0.0056, "num_tokens": 25394173.0, "reward": 1.5291264057159424, "reward_std": 0.18222017586231232, "rewards/accuracy_reward_step": 0.671875, "rewards/brier_reward_group": 0.7347449064254761, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7176977396011353, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.2009375000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2009375000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1130.0, "completions/max_terminated_length": 1130.0, "completions/mean_length": 419.265625, "completions/mean_terminated_length": 425.920654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.1216, "grad_norm": 0.007103959564119577, "learning_rate": 2.388888888888889e-06, "loss": -0.01, "num_tokens": 25606529.0, "reward": 1.7007160186767578, "reward_std": 0.19283781945705414, "rewards/accuracy_reward_step": 0.7890625, "rewards/brier_reward_group": 0.8586108684539795, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7880029678344727, "step": 114 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.3493750000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3493750000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1048.0, "completions/max_terminated_length": 1048.0, "completions/mean_length": 407.21484375, "completions/mean_terminated_length": 413.6785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.12266666666666666, "grad_norm": 0.04583623260259628, "learning_rate": 2.361111111111111e-06, "loss": -0.0278, "num_tokens": 25816040.0, "reward": 1.4954776763916016, "reward_std": 0.2661696672439575, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.7326734662055969, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6867371797561646, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2400000000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2400000000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1088.0, "completions/max_terminated_length": 1088.0, "completions/mean_length": 467.328125, "completions/mean_terminated_length": 474.7460632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.12373333333333333, "grad_norm": 0.024667803198099136, "learning_rate": 2.3333333333333336e-06, "loss": -0.0079, "num_tokens": 26040196.0, "reward": 1.6541872024536133, "reward_std": 0.1952700912952423, "rewards/accuracy_reward_step": 0.75, "rewards/brier_reward_group": 0.8135454058647156, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8110154867172241, "step": 116 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.3650000000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3650000000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1099.0, "completions/max_terminated_length": 1099.0, "completions/mean_length": 431.20703125, "completions/mean_terminated_length": 438.0516052246094, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.1248, "grad_norm": 0.022631388157606125, "learning_rate": 2.305555555555556e-06, "loss": 0.0036, "num_tokens": 26257185.0, "reward": 1.4794007539749146, "reward_std": 0.2126893401145935, "rewards/accuracy_reward_step": 0.625, "rewards/brier_reward_group": 0.7046656608581543, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7129372954368591, "step": 117 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.504869684499314, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.35063492063492074, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": -0.005358024691357932, "calib/mean_conf": 0.9855555555555555, "calib/mu_c": 0.983641975308642, "calib/mu_w": 0.9889999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3466666666666668, "calib/std_conf": 0.06242526395816772, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1703.0, "completions/max_terminated_length": 1703.0, "completions/mean_length": 480.93359375, "completions/mean_terminated_length": 490.5139465332031, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.12586666666666665, "grad_norm": 0.012477186508476734, "learning_rate": 2.277777777777778e-06, "loss": -0.0044, "num_tokens": 26484312.0, "reward": 1.4674220085144043, "reward_std": 0.29449427127838135, "rewards/accuracy_reward_step": 0.6328125, "rewards/brier_reward_group": 0.7023806571960449, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.6907451152801514, "step": 118 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.3403937007874016, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3403937007874016, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 498.09765625, "completions/mean_terminated_length": 506.0039978027344, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.12693333333333334, "grad_norm": 0.009062022902071476, "learning_rate": 2.25e-06, "loss": -0.0133, "num_tokens": 26716889.0, "reward": 1.4937986135482788, "reward_std": 0.28233009576797485, "rewards/accuracy_reward_step": 0.64453125, "rewards/brier_reward_group": 0.7360901236534119, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6844169497489929, "step": 119 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.23409448818897638, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23409448818897638, "calib/std_conf": 0.0, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1648.0, "completions/max_terminated_length": 1648.0, "completions/mean_length": 462.0859375, "completions/mean_terminated_length": 469.420654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.128, "grad_norm": 0.032117150723934174, "learning_rate": 2.222222222222222e-06, "loss": -0.0033, "num_tokens": 26941871.0, "reward": 1.6441919803619385, "reward_std": 0.1847691833972931, "rewards/accuracy_reward_step": 0.75, "rewards/brier_reward_group": 0.8027962446212769, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.789596438407898, "step": 120 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5057471264367817, "calib/avg_num_step_conf": 1.2890625, "calib/ece": 0.30772509803921566, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00011436781609175384, "calib/mean_conf": 0.9900780392156863, "calib/mu_c": 0.9901143678160917, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.30772509803921566, "calib/std_conf": 0.0008777355437354933, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 527.12890625, "completions/mean_terminated_length": 535.4960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.12906666666666666, "grad_norm": 0.11755120009183884, "learning_rate": 2.1944444444444445e-06, "loss": 0.0216, "num_tokens": 27181872.0, "reward": 1.556952714920044, "reward_std": 0.24504821002483368, "rewards/accuracy_reward_step": 0.6796875, "rewards/brier_reward_group": 0.7592884302139282, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7575849890708923, "step": 121 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49021639042357273, "calib/avg_num_step_conf": 1.59375, "calib/ece": 0.2728458498023714, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9920948616600791, "calib/gap": 0.01103974831184762, "calib/mean_conf": 0.9847035573122529, "calib/mu_c": 0.9878453038674031, "calib/mu_w": 0.9768055555555555, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2710671936758892, "calib/std_conf": 0.06793110805728833, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2496.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 538.4375, "completions/mean_terminated_length": 549.1633911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.13013333333333332, "grad_norm": 0.009088210761547089, "learning_rate": 2.166666666666667e-06, "loss": -0.0168, "num_tokens": 27427056.0, "reward": 1.5797841548919678, "reward_std": 0.24107559025287628, "rewards/accuracy_reward_step": 0.70703125, "rewards/brier_reward_group": 0.7796617150306702, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7425999641418457, "step": 122 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48445945945945945, "calib/avg_num_step_conf": 1.7890625, "calib/ece": 0.2819291338582677, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0003108108108108576, "calib/mean_conf": 0.9905905511811023, "calib/mu_c": 0.9904999999999998, "calib/mu_w": 0.9908108108108107, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2819291338582677, "calib/std_conf": 0.0023572783275468846, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 568.74609375, "completions/mean_terminated_length": 577.7738647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.1312, "grad_norm": 0.011878207325935364, "learning_rate": 2.138888888888889e-06, "loss": -0.0138, "num_tokens": 27677943.0, "reward": 1.5732364654541016, "reward_std": 0.26530539989471436, "rewards/accuracy_reward_step": 0.703125, "rewards/brier_reward_group": 0.7894773483276367, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7144062519073486, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.53125, "calib/avg_num_step_conf": 1.88671875, "calib/ece": 0.24046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006249999999999867, "calib/mean_conf": 0.99046875, "calib/mu_c": 0.9906250000000001, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24046875, "calib/std_conf": 0.002113710821635734, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1323.0, "completions/max_terminated_length": 1323.0, "completions/mean_length": 527.828125, "completions/mean_terminated_length": 538.3426513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.13226666666666667, "grad_norm": 0.13055697083473206, "learning_rate": 2.1111111111111114e-06, "loss": -0.003, "num_tokens": 27919883.0, "reward": 1.6604065895080566, "reward_std": 0.19660840928554535, "rewards/accuracy_reward_step": 0.75, "rewards/brier_reward_group": 0.8212425708770752, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8203842639923096, "step": 124 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45983563096500535, "calib/avg_num_step_conf": 1.9296875, "calib/ece": 0.35027343750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0008032873806997909, "calib/mean_conf": 0.9908984375000001, "calib/mu_c": 0.9906097560975609, "calib/mu_w": 0.9914130434782606, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35027343750000006, "calib/std_conf": 0.0028595777762798766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1631.0, "completions/max_terminated_length": 1631.0, "completions/mean_length": 542.22265625, "completions/mean_terminated_length": 550.8294067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.13333333333333333, "grad_norm": 0.015504639595746994, "learning_rate": 2.0833333333333334e-06, "loss": 0.0214, "num_tokens": 28163500.0, "reward": 1.4953252077102661, "reward_std": 0.22951993346214294, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.7288151979446411, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6899855136871338, "step": 125 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48952496179317373, "calib/avg_num_step_conf": 2.00390625, "calib/ece": 0.398392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0002095007641369495, "calib/mean_conf": 0.990549019607843, "calib/mu_c": 0.99046357615894, "calib/mu_w": 0.9906730769230769, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.398392156862745, "calib/std_conf": 0.002277887957875705, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 585.51171875, "completions/mean_terminated_length": 592.45458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1344, "grad_norm": 0.010014167055487633, "learning_rate": 2.0555555555555555e-06, "loss": 0.0058, "num_tokens": 28418855.0, "reward": 1.4250679016113281, "reward_std": 0.30080366134643555, "rewards/accuracy_reward_step": 0.58984375, "rewards/brier_reward_group": 0.6859462857246399, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6705750226974487, "step": 126 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4936708860759494, "calib/avg_num_step_conf": 1.9296875, "calib/ece": 0.3100760000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": -0.005804944851580007, "calib/mean_conf": 0.9861560000000001, "calib/mu_c": 0.9843216374269005, "calib/mu_w": 0.9901265822784805, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.30611600000000005, "calib/std_conf": 0.061867177598464926, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 544.22265625, "completions/mean_terminated_length": 552.8611450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.13546666666666668, "grad_norm": 0.011544213630259037, "learning_rate": 2.027777777777778e-06, "loss": 0.0069, "num_tokens": 28661848.0, "reward": 1.5123376846313477, "reward_std": 0.30747634172439575, "rewards/accuracy_reward_step": 0.66796875, "rewards/brier_reward_group": 0.746268630027771, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.6937066316604614, "step": 127 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.95703125, "calib/ece": 0.33523809523809533, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.33523809523809533, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 597.9375, "completions/mean_terminated_length": 605.0277099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.13653333333333334, "grad_norm": 0.00843075942248106, "learning_rate": 2.0000000000000003e-06, "loss": 0.0135, "num_tokens": 28921584.0, "reward": 1.4812567234039307, "reward_std": 0.2827935218811035, "rewards/accuracy_reward_step": 0.64453125, "rewards/brier_reward_group": 0.7225621938705444, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.6946519613265991, "step": 128 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4970414201183432, "calib/avg_num_step_conf": 1.96875, "calib/ece": 0.32862204724409444, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": -0.0058579881656802835, "calib/mean_conf": 0.9861023622047244, "calib/mu_c": 0.9841420118343195, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3246850393700787, "calib/std_conf": 0.06199572434401969, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2199.0, "completions/max_terminated_length": 2199.0, "completions/mean_length": 510.7734375, "completions/mean_terminated_length": 520.9482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1376, "grad_norm": 0.00900917686522007, "learning_rate": 1.9722222222222224e-06, "loss": -0.0236, "num_tokens": 29154726.0, "reward": 1.5183894634246826, "reward_std": 0.26133912801742554, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.737421452999115, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7267614603042603, "step": 129 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5068493150684932, "calib/avg_num_step_conf": 1.95703125, "calib/ece": 0.277244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005479452054794054, "calib/mean_conf": 0.9898425196850393, "calib/mu_c": 0.99, "calib/mu_w": 0.9894520547945206, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.277244094488189, "calib/std_conf": 0.0025048777512735243, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1967.0, "completions/max_terminated_length": 1967.0, "completions/mean_length": 482.98046875, "completions/mean_terminated_length": 492.60162353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.13866666666666666, "grad_norm": 0.025695854797959328, "learning_rate": 1.944444444444445e-06, "loss": -0.0101, "num_tokens": 29383657.0, "reward": 1.595362663269043, "reward_std": 0.15114884078502655, "rewards/accuracy_reward_step": 0.70703125, "rewards/brier_reward_group": 0.7583874464035034, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8105632066726685, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5076923076923077, "calib/avg_num_step_conf": 2.0859375, "calib/ece": 0.49378906249999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.0079230769230767, "calib/mean_conf": 0.9859765625000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9820769230769232, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.49378906249999993, "calib/std_conf": 0.061794640347554344, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2128.0, "completions/max_terminated_length": 2128.0, "completions/mean_length": 513.6484375, "completions/mean_terminated_length": 521.8016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.13973333333333332, "grad_norm": 0.013012518174946308, "learning_rate": 1.916666666666667e-06, "loss": -0.0065, "num_tokens": 29621359.0, "reward": 1.269148826599121, "reward_std": 0.22060997784137726, "rewards/accuracy_reward_step": 0.4921875, "rewards/brier_reward_group": 0.5776644349098206, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.5301808714866638, "step": 131 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.521974306964165, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.18681102362204716, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.02004926108374383, "calib/mean_conf": 0.986023622047244, "calib/mu_c": 0.9900492610837439, "calib/mu_w": 0.9700000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.18681102362204716, "calib/std_conf": 0.062022470472996194, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1954.0, "completions/max_terminated_length": 1954.0, "completions/mean_length": 491.2734375, "completions/mean_terminated_length": 499.07147216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1408, "grad_norm": 0.008971575647592545, "learning_rate": 1.888888888888889e-06, "loss": -0.014, "num_tokens": 29852717.0, "reward": 1.7127364873886108, "reward_std": 0.2484358251094818, "rewards/accuracy_reward_step": 0.79296875, "rewards/brier_reward_group": 0.870286762714386, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8322217464447021, "step": 132 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49993064225273964, "calib/avg_num_step_conf": 1.84765625, "calib/ece": 0.34844621513944235, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": -0.005487584963240422, "calib/mean_conf": 0.9858964143426296, "calib/mu_c": 0.9839506172839506, "calib/mu_w": 0.989438202247191, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.34446215139442243, "calib/std_conf": 0.06244274849598392, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2597.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 625.328125, "completions/mean_terminated_length": 635.2540283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.14186666666666667, "grad_norm": 0.011458762921392918, "learning_rate": 1.8611111111111113e-06, "loss": -0.0191, "num_tokens": 30119145.0, "reward": 1.4716432094573975, "reward_std": 0.3156856596469879, "rewards/accuracy_reward_step": 0.6328125, "rewards/brier_reward_group": 0.7177450656890869, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6844528913497925, "step": 133 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5031847133757962, "calib/avg_num_step_conf": 1.8984375, "calib/ece": 0.3767578125000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 6.369426751584584e-05, "calib/mean_conf": 0.9900390625000002, "calib/mu_c": 0.9900636942675157, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3767578125000002, "calib/std_conf": 0.0006237781024480985, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1810.0, "completions/max_terminated_length": 1810.0, "completions/mean_length": 582.15234375, "completions/mean_terminated_length": 591.3928833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.14293333333333333, "grad_norm": 0.009385544806718826, "learning_rate": 1.8333333333333333e-06, "loss": -0.0148, "num_tokens": 30377128.0, "reward": 1.4562424421310425, "reward_std": 0.3281945586204529, "rewards/accuracy_reward_step": 0.61328125, "rewards/brier_reward_group": 0.7188370823860168, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6764452457427979, "step": 134 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.9375, "calib/ece": 0.28803921568627455, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.28803921568627455, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1595.0, "completions/max_terminated_length": 1595.0, "completions/mean_length": 527.69921875, "completions/mean_terminated_length": 536.075439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.144, "grad_norm": 0.007142702117562294, "learning_rate": 1.8055555555555557e-06, "loss": -0.0229, "num_tokens": 30618099.0, "reward": 1.5674383640289307, "reward_std": 0.22207137942314148, "rewards/accuracy_reward_step": 0.69921875, "rewards/brier_reward_group": 0.7638119459152222, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7246912717819214, "step": 135 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.90625, "calib/ece": 0.395511811023622, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999997, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.395511811023622, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 543.109375, "completions/mean_terminated_length": 551.7301635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.14506666666666668, "grad_norm": 0.009068154729902744, "learning_rate": 1.777777777777778e-06, "loss": -0.0087, "num_tokens": 30865623.0, "reward": 1.410581350326538, "reward_std": 0.24024391174316406, "rewards/accuracy_reward_step": 0.58984375, "rewards/brier_reward_group": 0.6730188131332397, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6333686113357544, "step": 136 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.502540650406504, "calib/avg_num_step_conf": 1.96875, "calib/ece": 0.3443700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9921259842519685, "calib/gap": 0.0050243902439025545, "calib/mean_conf": 0.9822440944881891, "calib/mu_c": 0.9840243902439024, "calib/mu_w": 0.9789999999999999, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.34047244094488194, "calib/std_conf": 0.08706095398753345, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2251.0, "completions/max_terminated_length": 2251.0, "completions/mean_length": 527.01953125, "completions/mean_terminated_length": 533.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.14613333333333334, "grad_norm": 0.007421621587127447, "learning_rate": 1.75e-06, "loss": -0.0042, "num_tokens": 31107524.0, "reward": 1.4851226806640625, "reward_std": 0.24835412204265594, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.7215331792831421, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6955199241638184, "step": 137 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5056818181818181, "calib/avg_num_step_conf": 1.96875, "calib/ece": 0.3349411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00045454545454515216, "calib/mean_conf": 0.9898431372549019, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9895454545454547, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3349411764705882, "calib/std_conf": 0.00249998077655047, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1179.0, "completions/max_terminated_length": 1179.0, "completions/mean_length": 500.7265625, "completions/mean_terminated_length": 508.67462158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1472, "grad_norm": 0.033796265721321106, "learning_rate": 1.7222222222222224e-06, "loss": -0.0122, "num_tokens": 31340046.0, "reward": 1.5078368186950684, "reward_std": 0.23290802538394928, "rewards/accuracy_reward_step": 0.65234375, "rewards/brier_reward_group": 0.7345907092094421, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6951937675476074, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.0078125, "calib/ece": 0.2868750000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2868750000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1035.0, "completions/max_terminated_length": 1035.0, "completions/mean_length": 447.953125, "completions/mean_terminated_length": 455.0635070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.14826666666666666, "grad_norm": 0.00960354134440422, "learning_rate": 1.6944444444444446e-06, "loss": 0.0104, "num_tokens": 31557818.0, "reward": 1.5895135402679443, "reward_std": 0.1642017364501953, "rewards/accuracy_reward_step": 0.703125, "rewards/brier_reward_group": 0.7695093154907227, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7760449647903442, "step": 139 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5064935064935066, "calib/avg_num_step_conf": 2.03515625, "calib/ece": 0.29011764705882354, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.006103896103896389, "calib/mean_conf": 0.988156862745098, "calib/mu_c": 0.99, "calib/mu_w": 0.9838961038961036, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.29011764705882354, "calib/std_conf": 0.029374774124467987, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1857.0, "completions/max_terminated_length": 1857.0, "completions/mean_length": 501.19921875, "completions/mean_terminated_length": 509.15478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.14933333333333335, "grad_norm": 0.0098324716091156, "learning_rate": 1.6666666666666667e-06, "loss": -0.034, "num_tokens": 31791141.0, "reward": 1.5700232982635498, "reward_std": 0.1776837408542633, "rewards/accuracy_reward_step": 0.6953125, "rewards/brier_reward_group": 0.7559343576431274, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7507213950157166, "step": 140 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4930555555555556, "calib/avg_num_step_conf": 2.09765625, "calib/ece": 0.2712890625000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00013888888888868856, "calib/mean_conf": 0.9900390625000002, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9901388888888888, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2712890625000002, "calib/std_conf": 0.0006237781024480986, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1893.0, "completions/max_terminated_length": 1893.0, "completions/mean_length": 527.34375, "completions/mean_terminated_length": 535.7142944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.1504, "grad_norm": 0.008453505113720894, "learning_rate": 1.638888888888889e-06, "loss": 0.0063, "num_tokens": 32033237.0, "reward": 1.6114122867584229, "reward_std": 0.2033512145280838, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.783699631690979, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7947620749473572, "step": 141 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.11328125, "calib/ece": 0.35862745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35862745098039217, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1542.0, "completions/max_terminated_length": 1542.0, "completions/mean_length": 562.73828125, "completions/mean_terminated_length": 571.670654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.15146666666666667, "grad_norm": 0.00851828046143055, "learning_rate": 1.6111111111111113e-06, "loss": -0.0085, "num_tokens": 32282458.0, "reward": 1.4801580905914307, "reward_std": 0.24722754955291748, "rewards/accuracy_reward_step": 0.62890625, "rewards/brier_reward_group": 0.7196464538574219, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6931735277175903, "step": 142 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5028571428571429, "calib/avg_num_step_conf": 2.265625, "calib/ece": 0.3037647058823528, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.7142857142644665e-05, "calib/mean_conf": 0.9900392156862744, "calib/mu_c": 0.9900571428571426, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3037647058823528, "calib/std_conf": 0.0006249951941376173, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1852.0, "completions/max_terminated_length": 1852.0, "completions/mean_length": 517.73046875, "completions/mean_terminated_length": 525.9484252929688, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.15253333333333333, "grad_norm": 0.008409008383750916, "learning_rate": 1.5833333333333333e-06, "loss": 0.0108, "num_tokens": 32522333.0, "reward": 1.5461618900299072, "reward_std": 0.19538016617298126, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.7507796883583069, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.707305371761322, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.3828125, "calib/ece": 0.2243750000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2243750000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1712.0, "completions/max_terminated_length": 1712.0, "completions/mean_length": 517.0703125, "completions/mean_terminated_length": 525.27783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1536, "grad_norm": 0.01105842087417841, "learning_rate": 1.5555555555555558e-06, "loss": 0.0083, "num_tokens": 32758831.0, "reward": 1.6755250692367554, "reward_std": 0.2170555740594864, "rewards/accuracy_reward_step": 0.765625, "rewards/brier_reward_group": 0.8405249714851379, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7990751266479492, "step": 144 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5026041666666666, "calib/avg_num_step_conf": 2.5546875, "calib/ece": 0.2311462450592887, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.2083333333285964e-05, "calib/mean_conf": 0.9900395256916997, "calib/mu_c": 0.9900520833333335, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2311462450592887, "calib/std_conf": 0.0006274509038097848, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2577.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 524.8125, "completions/mean_terminated_length": 533.1428833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.15466666666666667, "grad_norm": 0.014037779532372952, "learning_rate": 1.527777777777778e-06, "loss": -0.0081, "num_tokens": 32995887.0, "reward": 1.6281208992004395, "reward_std": 0.23964989185333252, "rewards/accuracy_reward_step": 0.75, "rewards/brier_reward_group": 0.8128194212913513, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7309138774871826, "step": 145 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.83203125, "calib/ece": 0.4039453125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00028301886792436726, "calib/mean_conf": 0.9898828125000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9897169811320754, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4039453125000001, "calib/std_conf": 0.002574275061030535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1686.0, "completions/max_terminated_length": 1686.0, "completions/mean_length": 540.68359375, "completions/mean_terminated_length": 549.2659301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.15573333333333333, "grad_norm": 0.012108377180993557, "learning_rate": 1.5e-06, "loss": 0.0099, "num_tokens": 33241518.0, "reward": 1.417668104171753, "reward_std": 0.2301420271396637, "rewards/accuracy_reward_step": 0.5859375, "rewards/brier_reward_group": 0.6665316224098206, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.660391092300415, "step": 146 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.494949494949495, "calib/avg_num_step_conf": 2.85546875, "calib/ece": 0.37980314960629913, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00010101010101015717, "calib/mean_conf": 0.9900393700787401, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99010101010101, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37980314960629913, "calib/std_conf": 0.0006262194378183811, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 604.9609375, "completions/mean_terminated_length": 609.7244262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.1568, "grad_norm": 0.01028156653046608, "learning_rate": 1.4722222222222225e-06, "loss": -0.0038, "num_tokens": 33500068.0, "reward": 1.4331023693084717, "reward_std": 0.25268805027008057, "rewards/accuracy_reward_step": 0.60546875, "rewards/brier_reward_group": 0.6855310201644897, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6406280994415283, "step": 147 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.9609375, "calib/ece": 0.20259842519685034, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.3306690738754696e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20259842519685034, "calib/std_conf": 0.0, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1582.0, "completions/max_terminated_length": 1582.0, "completions/mean_length": 510.2734375, "completions/mean_terminated_length": 518.373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.15786666666666666, "grad_norm": 0.007744878530502319, "learning_rate": 1.4444444444444445e-06, "loss": -0.0203, "num_tokens": 33735810.0, "reward": 1.6874488592147827, "reward_std": 0.16425636410713196, "rewards/accuracy_reward_step": 0.78125, "rewards/brier_reward_group": 0.8287118077278137, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8117086887359619, "step": 148 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5058139534883721, "calib/avg_num_step_conf": 3.01953125, "calib/ece": 0.31023320158102774, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00011046511627899047, "calib/mean_conf": 0.9900750988142293, "calib/mu_c": 0.990110465116279, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.31023320158102774, "calib/std_conf": 0.0008424815652963619, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 603.95703125, "completions/mean_terminated_length": 613.543701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.15893333333333334, "grad_norm": 0.01476355828344822, "learning_rate": 1.4166666666666667e-06, "loss": -0.0105, "num_tokens": 33994879.0, "reward": 1.5313372611999512, "reward_std": 0.222749263048172, "rewards/accuracy_reward_step": 0.671875, "rewards/brier_reward_group": 0.7424355149269104, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7266632318496704, "step": 149 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5071428571428571, "calib/avg_num_step_conf": 3.2578125, "calib/ece": 0.26407114624505945, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": 0.009428571428571342, "calib/mean_conf": 0.9873913043478261, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9805714285714284, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.26407114624505945, "calib/std_conf": 0.041411759651445765, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2445.0, "completions/max_terminated_length": 2445.0, "completions/mean_length": 499.72265625, "completions/mean_terminated_length": 507.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.16, "grad_norm": 0.011728521436452866, "learning_rate": 1.3888888888888892e-06, "loss": -0.0128, "num_tokens": 34227768.0, "reward": 1.5932334661483765, "reward_std": 0.1388700008392334, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.7603456974029541, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7844633460044861, "step": 150 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5115467239527389, "calib/avg_num_step_conf": 3.5546875, "calib/ece": 0.38100000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": 0.0028866809881847866, "calib/mean_conf": 0.989, "calib/mu_c": 0.9901315789473685, "calib/mu_w": 0.9872448979591837, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.38100000000000006, "calib/std_conf": 0.015394804318340654, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2671.0, "completions/max_terminated_length": 2671.0, "completions/mean_length": 595.3828125, "completions/mean_terminated_length": 607.2430419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.16106666666666666, "grad_norm": 0.009798677638173103, "learning_rate": 1.3611111111111112e-06, "loss": -0.0394, "num_tokens": 34487210.0, "reward": 1.403533935546875, "reward_std": 0.20775704085826874, "rewards/accuracy_reward_step": 0.59375, "rewards/brier_reward_group": 0.6569827198982239, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.6446533203125, "step": 151 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5027472527472527, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.273503937007874, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.494505494485491e-05, "calib/mean_conf": 0.9900393700787401, "calib/mu_c": 0.9900549450549448, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.273503937007874, "calib/std_conf": 0.0006262194378183811, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2473.0, "completions/max_terminated_length": 2473.0, "completions/mean_length": 586.57421875, "completions/mean_terminated_length": 595.8849487304688, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.16213333333333332, "grad_norm": 0.010811089538037777, "learning_rate": 1.3333333333333334e-06, "loss": 0.028, "num_tokens": 34742765.0, "reward": 1.5861300230026245, "reward_std": 0.19278180599212646, "rewards/accuracy_reward_step": 0.7109375, "rewards/brier_reward_group": 0.7697770595550537, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7544306516647339, "step": 152 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.9609375, "calib/ece": 0.30889763779527557, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30889763779527557, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 558.609375, "completions/mean_terminated_length": 567.4761962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1632, "grad_norm": 0.014793137088418007, "learning_rate": 1.3055555555555556e-06, "loss": -0.0089, "num_tokens": 34993089.0, "reward": 1.54752779006958, "reward_std": 0.23927220702171326, "rewards/accuracy_reward_step": 0.67578125, "rewards/brier_reward_group": 0.7510262727737427, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7515851259231567, "step": 153 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5150375939849624, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.46027888446215137, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0003007518796993569, "calib/mean_conf": 0.9901593625498007, "calib/mu_c": 0.9903007518796991, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.46027888446215137, "calib/std_conf": 0.0012522895335061134, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 573.69140625, "completions/mean_terminated_length": 587.4600219726562, "completions/min_length": 0.0, "completions/min_terminated_length": 244.0, "epoch": 0.16426666666666667, "grad_norm": 0.010138453915715218, "learning_rate": 1.2777777777777779e-06, "loss": -0.0437, "num_tokens": 35244394.0, "reward": 1.3009169101715088, "reward_std": 0.2400965690612793, "rewards/accuracy_reward_step": 0.51953125, "rewards/brier_reward_group": 0.5758287906646729, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.5887768268585205, "step": 154 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.50274467800241, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.37669243027888444, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.393626991567757e-05, "calib/mean_conf": 0.9902382470119522, "calib/mu_c": 0.9902590909090909, "calib/mu_w": 0.9902051546391752, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.37669243027888444, "calib/std_conf": 0.0015224388442898556, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 527.0703125, "completions/mean_terminated_length": 539.7200317382812, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.16533333333333333, "grad_norm": 0.010918740183115005, "learning_rate": 1.25e-06, "loss": -0.0585, "num_tokens": 35486540.0, "reward": 1.4296875, "reward_std": 0.29709577560424805, "rewards/accuracy_reward_step": 0.6015625, "rewards/brier_reward_group": 0.6891999244689941, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.662362277507782, "step": 155 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5087719298245614, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.30337349397590374, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00017543859649149507, "calib/mean_conf": 0.990120481927711, "calib/mu_c": 0.9901754385964912, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.30337349397590374, "calib/std_conf": 0.0010910102576069185, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2366.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 592.9140625, "completions/mean_terminated_length": 609.5823364257812, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.1664, "grad_norm": 0.010354182682931423, "learning_rate": 1.2222222222222223e-06, "loss": -0.0329, "num_tokens": 35743086.0, "reward": 1.5343012809753418, "reward_std": 0.3117218017578125, "rewards/accuracy_reward_step": 0.66796875, "rewards/brier_reward_group": 0.7521328926086426, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7678846120834351, "step": 156 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4898785425101215, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.24505882352941166, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00020242914979762272, "calib/mean_conf": 0.990156862745098, "calib/mu_c": 0.9901052631578947, "calib/mu_w": 0.9903076923076923, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24505882352941166, "calib/std_conf": 0.001242586628843519, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 565.06640625, "completions/mean_terminated_length": 574.0357666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.16746666666666668, "grad_norm": 0.012160200625658035, "learning_rate": 1.1944444444444446e-06, "loss": -0.0206, "num_tokens": 35991471.0, "reward": 1.6435627937316895, "reward_std": 0.2203359603881836, "rewards/accuracy_reward_step": 0.7421875, "rewards/brier_reward_group": 0.8106563091278076, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8026572465896606, "step": 157 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5133074162679426, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.2919047619047621, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00026614832535920563, "calib/mean_conf": 0.9903174603174605, "calib/mu_c": 0.9903977272727272, "calib/mu_w": 0.990131578947368, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2919047619047621, "calib/std_conf": 0.0017532319074900432, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2490.0, "completions/max_terminated_length": 2490.0, "completions/mean_length": 572.046875, "completions/mean_terminated_length": 583.4422607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.16853333333333334, "grad_norm": 0.014418968930840492, "learning_rate": 1.1666666666666668e-06, "loss": 0.0067, "num_tokens": 36243155.0, "reward": 1.5606688261032104, "reward_std": 0.3102324903011322, "rewards/accuracy_reward_step": 0.6875, "rewards/brier_reward_group": 0.786474883556366, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.745262861251831, "step": 158 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49085878397310173, "calib/avg_num_step_conf": 4.14453125, "calib/ece": 0.31560784313725465, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00018282432053784436, "calib/mean_conf": 0.9901176470588233, "calib/mu_c": 0.9900581395348836, "calib/mu_w": 0.9902409638554215, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31560784313725465, "calib/std_conf": 0.0010782531046954929, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1756.0, "completions/max_terminated_length": 1756.0, "completions/mean_length": 545.7421875, "completions/mean_terminated_length": 554.40478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1696, "grad_norm": 0.010768786072731018, "learning_rate": 1.138888888888889e-06, "loss": -0.0014, "num_tokens": 36487649.0, "reward": 1.557401418685913, "reward_std": 0.24081464111804962, "rewards/accuracy_reward_step": 0.671875, "rewards/brier_reward_group": 0.7535665035247803, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8041641712188721, "step": 159 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.03515625, "calib/ece": 0.31539682539682545, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.31539682539682545, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2281.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 534.82421875, "completions/mean_terminated_length": 545.4780883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.17066666666666666, "grad_norm": 0.008867080323398113, "learning_rate": 1.111111111111111e-06, "loss": -0.0248, "num_tokens": 36729404.0, "reward": 1.5163869857788086, "reward_std": 0.2396489679813385, "rewards/accuracy_reward_step": 0.6640625, "rewards/brier_reward_group": 0.727623701095581, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7129243612289429, "step": 160 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4668008048289739, "calib/avg_num_step_conf": 3.90625, "calib/ece": 0.15486156862745082, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0006601945003353871, "calib/mean_conf": 0.9901556862745097, "calib/mu_c": 0.9900469483568075, "calib/mu_w": 0.9907071428571429, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15486156862745082, "calib/std_conf": 0.0012332791533970614, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 511.58203125, "completions/mean_terminated_length": 517.6482543945312, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.17173333333333332, "grad_norm": 0.00983340572565794, "learning_rate": 1.0833333333333335e-06, "loss": 0.0084, "num_tokens": 36964289.0, "reward": 1.7533671855926514, "reward_std": 0.14535510540008545, "rewards/accuracy_reward_step": 0.83203125, "rewards/brier_reward_group": 0.8738914728164673, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8270772695541382, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.86328125, "calib/ece": 0.182156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.182156862745098, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 478.39453125, "completions/mean_terminated_length": 485.9881286621094, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.1728, "grad_norm": 0.009535176679491997, "learning_rate": 1.0555555555555557e-06, "loss": 0.0057, "num_tokens": 37190902.0, "reward": 1.7314547300338745, "reward_std": 0.1599610149860382, "rewards/accuracy_reward_step": 0.8046875, "rewards/brier_reward_group": 0.858291506767273, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8565897941589355, "step": 162 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5059171597633136, "calib/avg_num_step_conf": 3.625, "calib/ece": 0.32472440944881886, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00011834319526637938, "calib/mean_conf": 0.9900787401574803, "calib/mu_c": 0.9901183431952661, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32472440944881886, "calib/std_conf": 0.0008838560756158924, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2110.0, "completions/max_terminated_length": 2110.0, "completions/mean_length": 556.859375, "completions/mean_terminated_length": 565.6984252929688, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.17386666666666667, "grad_norm": 0.023701557889580727, "learning_rate": 1.0277777777777777e-06, "loss": 0.0024, "num_tokens": 37438290.0, "reward": 1.5212602615356445, "reward_std": 0.2810484766960144, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.7524918913841248, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7075492143630981, "step": 163 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.6484375, "calib/ece": 0.3220312500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3220312500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1736.0, "completions/max_terminated_length": 1736.0, "completions/mean_length": 589.0078125, "completions/mean_terminated_length": 598.357177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.17493333333333333, "grad_norm": 0.01062087807804346, "learning_rate": 1.0000000000000002e-06, "loss": 0.0031, "num_tokens": 37695212.0, "reward": 1.532461404800415, "reward_std": 0.198165625333786, "rewards/accuracy_reward_step": 0.66796875, "rewards/brier_reward_group": 0.7308089733123779, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7427867650985718, "step": 164 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.50920245398773, "calib/avg_num_step_conf": 3.6796875, "calib/ece": 0.3509019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00018404907975455576, "calib/mean_conf": 0.9901176470588234, "calib/mu_c": 0.9901840490797544, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3509019607843137, "calib/std_conf": 0.0010782531046954927, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2239.0, "completions/max_terminated_length": 2239.0, "completions/mean_length": 581.88671875, "completions/mean_terminated_length": 591.123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.176, "grad_norm": 0.011879962868988514, "learning_rate": 9.722222222222224e-07, "loss": 0.0201, "num_tokens": 37949751.0, "reward": 1.4884181022644043, "reward_std": 0.24818657338619232, "rewards/accuracy_reward_step": 0.63671875, "rewards/brier_reward_group": 0.7207316160202026, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.693878173828125, "step": 165 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.76171875, "calib/ece": 0.21322709163346612, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": 0.017017543859649154, "calib/mean_conf": 0.9861354581673307, "calib/mu_c": 0.99, "calib/mu_w": 0.9729824561403508, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.21322709163346612, "calib/std_conf": 0.061739440745079505, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 568.79296875, "completions/mean_terminated_length": 575.53759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.17706666666666668, "grad_norm": 0.009490316733717918, "learning_rate": 9.444444444444445e-07, "loss": -0.0207, "num_tokens": 38201546.0, "reward": 1.6525752544403076, "reward_std": 0.2127598077058792, "rewards/accuracy_reward_step": 0.7578125, "rewards/brier_reward_group": 0.8025659322738647, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8155475854873657, "step": 166 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5051282051282051, "calib/avg_num_step_conf": 3.8671875, "calib/ece": 0.21932806324110676, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000102564102563929, "calib/mean_conf": 0.9900790513833992, "calib/mu_c": 0.990102564102564, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21932806324110676, "calib/std_conf": 0.0008855872135339169, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2242.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 540.40234375, "completions/mean_terminated_length": 548.9801635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.17813333333333334, "grad_norm": 0.007833111099898815, "learning_rate": 9.166666666666666e-07, "loss": -0.0158, "num_tokens": 38445497.0, "reward": 1.6641058921813965, "reward_std": 0.24750857055187225, "rewards/accuracy_reward_step": 0.76171875, "rewards/brier_reward_group": 0.8293477296829224, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8036386966705322, "step": 167 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.75, "calib/ece": 0.2605882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2605882352941177, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2620.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 604.66015625, "completions/mean_terminated_length": 611.830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.1792, "grad_norm": 0.012309842742979527, "learning_rate": 8.88888888888889e-07, "loss": -0.0032, "num_tokens": 38704962.0, "reward": 1.6134165525436401, "reward_std": 0.245762437582016, "rewards/accuracy_reward_step": 0.7265625, "rewards/brier_reward_group": 0.808096170425415, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7471325397491455, "step": 168 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49726402188782487, "calib/avg_num_step_conf": 3.90625, "calib/ece": 0.32625000000000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -5.471956224323282e-05, "calib/mean_conf": 0.9903125000000002, "calib/mu_c": 0.9902941176470588, "calib/mu_w": 0.990348837209302, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32625000000000015, "calib/std_conf": 0.0017399263633843835, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1619.0, "completions/max_terminated_length": 1619.0, "completions/mean_length": 545.78515625, "completions/mean_terminated_length": 554.4484252929688, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.18026666666666666, "grad_norm": 0.010852995328605175, "learning_rate": 8.611111111111112e-07, "loss": -0.0029, "num_tokens": 38948867.0, "reward": 1.5293468236923218, "reward_std": 0.18101638555526733, "rewards/accuracy_reward_step": 0.6640625, "rewards/brier_reward_group": 0.7271101474761963, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7340270280838013, "step": 169 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5129554655870445, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.24125490196078422, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.01534008097165962, "calib/mean_conf": 0.9863529411764705, "calib/mu_c": 0.9902631578947367, "calib/mu_w": 0.9749230769230771, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24125490196078422, "calib/std_conf": 0.06190784534191844, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1213.0, "completions/max_terminated_length": 1213.0, "completions/mean_length": 551.42578125, "completions/mean_terminated_length": 560.1785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.18133333333333335, "grad_norm": 0.007006386760622263, "learning_rate": 8.333333333333333e-07, "loss": -0.0061, "num_tokens": 39194184.0, "reward": 1.636326551437378, "reward_std": 0.1704462468624115, "rewards/accuracy_reward_step": 0.7421875, "rewards/brier_reward_group": 0.7999738454818726, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7843947410583496, "step": 170 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5324571883711668, "calib/avg_num_step_conf": 4.20703125, "calib/ece": 0.35533333333333317, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006491437674235323, "calib/mean_conf": 0.990627450980392, "calib/mu_c": 0.9908641975308643, "calib/mu_w": 0.9902150537634408, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35533333333333317, "calib/std_conf": 0.0024250391896063245, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 539.25, "completions/mean_terminated_length": 547.8095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.1824, "grad_norm": 0.009774522855877876, "learning_rate": 8.055555555555557e-07, "loss": -0.0079, "num_tokens": 39439128.0, "reward": 1.473537564277649, "reward_std": 0.22798626124858856, "rewards/accuracy_reward_step": 0.6328125, "rewards/brier_reward_group": 0.7120257616043091, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6586868762969971, "step": 171 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5456695240910434, "calib/avg_num_step_conf": 3.96484375, "calib/ece": 0.19456000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.988, "calib/gap": 0.017995861661247226, "calib/mean_conf": 0.9825600000000001, "calib/mu_c": 0.9862311557788944, "calib/mu_w": 0.9682352941176472, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19056000000000003, "calib/std_conf": 0.08078518676094028, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2261.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 526.01953125, "completions/mean_terminated_length": 536.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.18346666666666667, "grad_norm": 0.013370024040341377, "learning_rate": 7.777777777777779e-07, "loss": -0.0149, "num_tokens": 39677141.0, "reward": 1.6798417568206787, "reward_std": 0.22582019865512848, "rewards/accuracy_reward_step": 0.77734375, "rewards/brier_reward_group": 0.845207154750824, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8116599321365356, "step": 172 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.3976315996537886, "calib/avg_num_step_conf": 4.04296875, "calib/ece": 0.2795600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.996, "calib/gap": -0.00753875206546506, "calib/mean_conf": 0.9875600000000001, "calib/mu_c": 0.9854189944134077, "calib/mu_w": 0.9929577464788728, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2755600000000001, "calib/std_conf": 0.06268689177172529, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 570.3125, "completions/mean_terminated_length": 588.7096557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.18453333333333333, "grad_norm": 0.008059374988079071, "learning_rate": 7.5e-07, "loss": -0.036, "num_tokens": 39926301.0, "reward": 1.5564550161361694, "reward_std": 0.21988022327423096, "rewards/accuracy_reward_step": 0.69921875, "rewards/brier_reward_group": 0.7573325037956238, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7262998819351196, "step": 173 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4575250836120401, "calib/avg_num_step_conf": 4.109375, "calib/ece": 0.2517670682730925, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0006872909698998342, "calib/mean_conf": 0.9907228915662651, "calib/mu_c": 0.9905434782608695, "calib/mu_w": 0.9912307692307694, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2517670682730925, "calib/std_conf": 0.003019268454491971, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2415.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 587.125, "completions/mean_terminated_length": 603.6304931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.1856, "grad_norm": 0.027610663324594498, "learning_rate": 7.222222222222222e-07, "loss": 0.012, "num_tokens": 40180837.0, "reward": 1.593778133392334, "reward_std": 0.3387395739555359, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.810280978679657, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7445189952850342, "step": 174 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5908522087348267, "calib/avg_num_step_conf": 4.2421875, "calib/ece": 0.5054150197628459, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9881422924901185, "calib/gap": 0.009105243398823792, "calib/mean_conf": 0.9800395256916996, "calib/mu_c": 0.9847540983606559, "calib/mu_w": 0.9756488549618321, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.5016205533596838, "calib/std_conf": 0.10598119761540901, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2777.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 652.21484375, "completions/mean_terminated_length": 657.3504028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.18666666666666668, "grad_norm": 0.009824872948229313, "learning_rate": 6.944444444444446e-07, "loss": 0.0016, "num_tokens": 40453628.0, "reward": 1.263087272644043, "reward_std": 0.2979387640953064, "rewards/accuracy_reward_step": 0.4765625, "rewards/brier_reward_group": 0.578266978263855, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.5990820527076721, "step": 175 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5172768553994607, "calib/avg_num_step_conf": 5.375, "calib/ece": 0.3167843137254903, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921568627450981, "calib/gap": -0.010815240527884407, "calib/mean_conf": 0.9836078431372549, "calib/mu_c": 0.9801724137931032, "calib/mu_w": 0.9909876543209876, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30901960784313737, "calib/std_conf": 0.0866337973336387, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 554.05859375, "completions/mean_terminated_length": 562.8532104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.18773333333333334, "grad_norm": 0.01098695769906044, "learning_rate": 6.666666666666667e-07, "loss": -0.0266, "num_tokens": 40699531.0, "reward": 1.558232307434082, "reward_std": 0.19351378083229065, "rewards/accuracy_reward_step": 0.6796875, "rewards/brier_reward_group": 0.7470316290855408, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7749598026275635, "step": 176 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5670391061452514, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.28622047244094484, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0013407821229052264, "calib/mean_conf": 0.9909448818897637, "calib/mu_c": 0.991340782122905, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28622047244094484, "calib/std_conf": 0.0029250670269301933, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1611.0, "completions/max_terminated_length": 1611.0, "completions/mean_length": 586.91796875, "completions/mean_terminated_length": 596.2341918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.1888, "grad_norm": 0.009471280500292778, "learning_rate": 6.388888888888889e-07, "loss": -0.0103, "num_tokens": 40953614.0, "reward": 1.5689765214920044, "reward_std": 0.17845270037651062, "rewards/accuracy_reward_step": 0.69921875, "rewards/brier_reward_group": 0.7458236217498779, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7566449046134949, "step": 177 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5126967626967627, "calib/avg_num_step_conf": 6.26953125, "calib/ece": 0.2796484375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00025393525393524463, "calib/mean_conf": 0.9905859375000001, "calib/mu_c": 0.9906593406593406, "calib/mu_w": 0.9904054054054053, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2796484375000001, "calib/std_conf": 0.0023486277368058484, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1399.0, "completions/max_terminated_length": 1399.0, "completions/mean_length": 608.63671875, "completions/mean_terminated_length": 618.2976684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.18986666666666666, "grad_norm": 0.00890075508505106, "learning_rate": 6.111111111111112e-07, "loss": 0.0136, "num_tokens": 41215497.0, "reward": 1.6032111644744873, "reward_std": 0.23842912912368774, "rewards/accuracy_reward_step": 0.7109375, "rewards/brier_reward_group": 0.7975425720214844, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7715519666671753, "step": 178 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.48202378806992247, "calib/avg_num_step_conf": 11.4453125, "calib/ece": 0.24780082987551877, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00035952423860197147, "calib/mean_conf": 0.990539419087137, "calib/mu_c": 0.9904469273743015, "calib/mu_w": 0.9908064516129035, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.24780082987551877, "calib/std_conf": 0.0022590303051977117, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 624.6953125, "completions/mean_terminated_length": 655.41796875, "completions/min_length": 0.0, "completions/min_terminated_length": 281.0, "epoch": 0.19093333333333334, "grad_norm": 0.01979055441915989, "learning_rate": 5.833333333333334e-07, "loss": -0.047, "num_tokens": 41481683.0, "reward": 1.5406606197357178, "reward_std": 0.3186691403388977, "rewards/accuracy_reward_step": 0.69921875, "rewards/brier_reward_group": 0.7571793794631958, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7335880994796753, "step": 179 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.49709081710093606, "calib/avg_num_step_conf": 14.3984375, "calib/ece": 0.2649180327868852, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002462264946456738, "calib/mean_conf": 0.9903278688524589, "calib/mu_c": 0.9903954802259888, "calib/mu_w": 0.9901492537313431, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2649180327868852, "calib/std_conf": 0.0028441559955569655, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 738.76171875, "completions/mean_terminated_length": 775.09423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 0.192, "grad_norm": 0.008890091441571712, "learning_rate": 5.555555555555555e-07, "loss": -0.099, "num_tokens": 41774662.0, "reward": 1.5315313339233398, "reward_std": 0.30622541904449463, "rewards/accuracy_reward_step": 0.69140625, "rewards/brier_reward_group": 0.7310481667518616, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7232024073600769, "step": 180 }, { "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.5106624319419237, "calib/avg_num_step_conf": 24.91015625, "calib/ece": 0.3341628959276016, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00021324863883842582, "calib/mean_conf": 0.9902714932126695, "calib/mu_c": 0.9903448275862067, "calib/mu_w": 0.9901315789473683, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3341628959276016, "calib/std_conf": 0.0016251841625400892, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 638.71875, "completions/mean_terminated_length": 729.9642944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.19306666666666666, "grad_norm": 0.01944536156952381, "learning_rate": 5.277777777777779e-07, "loss": -0.1366, "num_tokens": 42044438.0, "reward": 1.3028833866119385, "reward_std": 0.4944838881492615, "rewards/accuracy_reward_step": 0.56640625, "rewards/brier_reward_group": 0.6211562156677246, "rewards/format_reward_step": 0.86328125, "rewards/stepwise_brier_reward": 0.5981900095939636, "step": 181 }, { "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.5105828720286552, "calib/avg_num_step_conf": 16.91796875, "calib/ece": 0.29875000000000007, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002116574405728322, "calib/mean_conf": 0.9904166666666667, "calib/mu_c": 0.9904819277108431, "calib/mu_w": 0.9902702702702703, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29875000000000007, "calib/std_conf": 0.001998263134713635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 684.17578125, "completions/mean_terminated_length": 726.7593994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 322.0, "epoch": 0.19413333333333332, "grad_norm": 0.009764508344233036, "learning_rate": 5.000000000000001e-07, "loss": -0.0896, "num_tokens": 42325747.0, "reward": 1.45506751537323, "reward_std": 0.37210720777511597, "rewards/accuracy_reward_step": 0.6484375, "rewards/brier_reward_group": 0.6974618434906006, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.6618707180023193, "step": 182 }, { "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.50920245398773, "calib/avg_num_step_conf": 19.65625, "calib/ece": 0.28450216450216437, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00018404907975455576, "calib/mean_conf": 0.99012987012987, "calib/mu_c": 0.9901840490797544, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.28450216450216437, "calib/std_conf": 0.0011321815437767995, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 748.1171875, "completions/mean_terminated_length": 814.97021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 295.0, "epoch": 0.1952, "grad_norm": 0.012072579003870487, "learning_rate": 4.7222222222222226e-07, "loss": -0.0901, "num_tokens": 42623945.0, "reward": 1.4325345754623413, "reward_std": 0.40637436509132385, "rewards/accuracy_reward_step": 0.63671875, "rewards/brier_reward_group": 0.7000696063041687, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.678506076335907, "step": 183 }, { "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5168248490077654, "calib/avg_num_step_conf": 17.6015625, "calib/ece": 0.2529741379310346, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005033074489499967, "calib/mean_conf": 0.990043103448276, "calib/mu_c": 0.9901754385964912, "calib/mu_w": 0.9896721311475412, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2529741379310346, "calib/std_conf": 0.0017364859550416988, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2869.0, "completions/max_terminated_length": 2869.0, "completions/mean_length": 645.7421875, "completions/mean_terminated_length": 703.44677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 0.19626666666666667, "grad_norm": 0.008652531541883945, "learning_rate": 4.444444444444445e-07, "loss": -0.0473, "num_tokens": 42894535.0, "reward": 1.4711438417434692, "reward_std": 0.3274310827255249, "rewards/accuracy_reward_step": 0.66796875, "rewards/brier_reward_group": 0.7086145281791687, "rewards/format_reward_step": 0.90234375, "rewards/stepwise_brier_reward": 0.6993983387947083, "step": 184 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5026881720430108, "calib/avg_num_step_conf": 9.4609375, "calib/ece": 0.23394308943089426, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.3763440860055134e-05, "calib/mean_conf": 0.990040650406504, "calib/mu_c": 0.9900537634408603, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.23394308943089426, "calib/std_conf": 0.0006362795057926236, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 707.58984375, "completions/mean_terminated_length": 721.685302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 357.0, "epoch": 0.19733333333333333, "grad_norm": 0.01276659406721592, "learning_rate": 4.1666666666666667e-07, "loss": 0.0043, "num_tokens": 43182598.0, "reward": 1.5871763229370117, "reward_std": 0.23624706268310547, "rewards/accuracy_reward_step": 0.7265625, "rewards/brier_reward_group": 0.770573616027832, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.7578195333480835, "step": 185 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5110367892976588, "calib/avg_num_step_conf": 8.66796875, "calib/ece": 0.24742971887550208, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9959839357429718, "calib/gap": 0.015303511705685668, "calib/mean_conf": 0.9863855421686748, "calib/mu_c": 0.9903804347826088, "calib/mu_w": 0.9750769230769232, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24742971887550208, "calib/std_conf": 0.06266334439471093, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 668.296875, "completions/mean_terminated_length": 684.3360595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 263.0, "epoch": 0.1984, "grad_norm": 0.00998939573764801, "learning_rate": 3.8888888888888895e-07, "loss": -0.034, "num_tokens": 43458722.0, "reward": 1.5860662460327148, "reward_std": 0.20728999376296997, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.7621311545372009, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.7696335315704346, "step": 186 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4972781065088757, "calib/avg_num_step_conf": 7.81640625, "calib/ece": 0.2976639344262294, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00022169625246559743, "calib/mean_conf": 0.9902868852459016, "calib/mu_c": 0.9903550295857988, "calib/mu_w": 0.9901333333333332, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2976639344262294, "calib/std_conf": 0.002775712899704437, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 687.0625, "completions/mean_terminated_length": 714.9918212890625, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.19946666666666665, "grad_norm": 0.017793305218219757, "learning_rate": 3.611111111111111e-07, "loss": -0.0418, "num_tokens": 43736154.0, "reward": 1.5049433708190918, "reward_std": 0.37476426362991333, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.7644026279449463, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.71630859375, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5325, "calib/avg_num_step_conf": 6.7265625, "calib/ece": 0.20619607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006499999999997064, "calib/mean_conf": 0.9905098039215686, "calib/mu_c": 0.99065, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20619607843137255, "calib/std_conf": 0.0021995770450792423, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 664.03515625, "completions/mean_terminated_length": 674.575439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.20053333333333334, "grad_norm": 0.008216816000640392, "learning_rate": 3.3333333333333335e-07, "loss": -0.0195, "num_tokens": 44010219.0, "reward": 1.695479393005371, "reward_std": 0.163077250123024, "rewards/accuracy_reward_step": 0.78125, "rewards/brier_reward_group": 0.8345831632614136, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8301471471786499, "step": 188 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5093599033816425, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.2715234375000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0001871980676328988, "calib/mean_conf": 0.9902734375000001, "calib/mu_c": 0.9903260869565217, "calib/mu_w": 0.9901388888888888, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2715234375000001, "calib/std_conf": 0.001630830136339697, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1543.0, "completions/max_terminated_length": 1543.0, "completions/mean_length": 620.70703125, "completions/mean_terminated_length": 630.5595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.2016, "grad_norm": 0.009894832968711853, "learning_rate": 3.055555555555556e-07, "loss": -0.0149, "num_tokens": 44276888.0, "reward": 1.6106594800949097, "reward_std": 0.2062380313873291, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.7946687340736389, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7807815670967102, "step": 189 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5088156230234029, "calib/avg_num_step_conf": 6.4296875, "calib/ece": 0.26196850393700793, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": -0.005039531941808617, "calib/mean_conf": 0.9864566929133859, "calib/mu_c": 0.9851075268817205, "calib/mu_w": 0.9901470588235292, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25807086614173236, "calib/std_conf": 0.061414142861346575, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2453.0, "completions/max_terminated_length": 2453.0, "completions/mean_length": 666.02734375, "completions/mean_terminated_length": 676.5992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 260.0, "epoch": 0.20266666666666666, "grad_norm": 0.006808450445532799, "learning_rate": 2.7777777777777776e-07, "loss": -0.0015, "num_tokens": 44552999.0, "reward": 1.604135274887085, "reward_std": 0.16429871320724487, "rewards/accuracy_reward_step": 0.7265625, "rewards/brier_reward_group": 0.7767007350921631, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7492154836654663, "step": 190 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49062078272604587, "calib/avg_num_step_conf": 7.20703125, "calib/ece": 0.3686852589641435, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00018758434547894165, "calib/mean_conf": 0.9901992031872511, "calib/mu_c": 0.990128205128205, "calib/mu_w": 0.9903157894736839, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3686852589641435, "calib/std_conf": 0.001397265172649418, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2606.0, "completions/max_terminated_length": 2606.0, "completions/mean_length": 602.3828125, "completions/mean_terminated_length": 621.8145141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.20373333333333332, "grad_norm": 0.008911381475627422, "learning_rate": 2.5000000000000004e-07, "loss": -0.0611, "num_tokens": 44811377.0, "reward": 1.429365634918213, "reward_std": 0.17169693112373352, "rewards/accuracy_reward_step": 0.609375, "rewards/brier_reward_group": 0.6546695232391357, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6643551588058472, "step": 191 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4935262148337596, "calib/avg_num_step_conf": 6.38671875, "calib/ece": 0.26404761904761914, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": -0.005402813299232423, "calib/mean_conf": 0.9863492063492063, "calib/mu_c": 0.9848913043478262, "calib/mu_w": 0.9902941176470587, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2601190476190477, "calib/std_conf": 0.06164546835908856, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2522.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 610.15625, "completions/mean_terminated_length": 622.310791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.2048, "grad_norm": 0.010867852717638016, "learning_rate": 2.2222222222222224e-07, "loss": 0.0018, "num_tokens": 45072553.0, "reward": 1.5917949676513672, "reward_std": 0.21750152111053467, "rewards/accuracy_reward_step": 0.71875, "rewards/brier_reward_group": 0.7597798109054565, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7714629173278809, "step": 192 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5056060606060606, "calib/avg_num_step_conf": 6.703125, "calib/ece": 0.28501992031872525, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": 0.013046969696969835, "calib/mean_conf": 0.9862151394422312, "calib/mu_c": 0.9901136363636364, "calib/mu_w": 0.9770666666666665, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.28501992031872525, "calib/std_conf": 0.06175082890454899, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2279.0, "completions/max_terminated_length": 2279.0, "completions/mean_length": 577.17578125, "completions/mean_terminated_length": 591.0280151367188, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.20586666666666667, "grad_norm": 0.010334227234125137, "learning_rate": 1.9444444444444447e-07, "loss": -0.0436, "num_tokens": 45326022.0, "reward": 1.5489764213562012, "reward_std": 0.2321387529373169, "rewards/accuracy_reward_step": 0.6875, "rewards/brier_reward_group": 0.7586432695388794, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7263247966766357, "step": 193 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5377777777777778, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.29894531249999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.024844444444444447, "calib/mean_conf": 0.9825390625000001, "calib/mu_c": 0.9904, "calib/mu_w": 0.9655555555555555, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29894531249999995, "calib/std_conf": 0.08632355580385398, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 586.3515625, "completions/mean_terminated_length": 595.6587524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.20693333333333333, "grad_norm": 0.031296394765377045, "learning_rate": 1.6666666666666668e-07, "loss": -0.0056, "num_tokens": 45582072.0, "reward": 1.5580151081085205, "reward_std": 0.22170788049697876, "rewards/accuracy_reward_step": 0.68359375, "rewards/brier_reward_group": 0.7631152868270874, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7423826456069946, "step": 194 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5213414634146342, "calib/avg_num_step_conf": 6.30078125, "calib/ece": 0.339484126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00042682926829240575, "calib/mean_conf": 0.9902777777777778, "calib/mu_c": 0.9904268292682925, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.339484126984127, "calib/std_conf": 0.0016433554953054499, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 600.27734375, "completions/mean_terminated_length": 617.152587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.208, "grad_norm": 0.010016894899308681, "learning_rate": 1.3888888888888888e-07, "loss": -0.0405, "num_tokens": 45841727.0, "reward": 1.479560136795044, "reward_std": 0.24249720573425293, "rewards/accuracy_reward_step": 0.640625, "rewards/brier_reward_group": 0.7105613946914673, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6842419505119324, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5053896249719291, "calib/avg_num_step_conf": 6.16015625, "calib/ece": 0.2715624999999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.012986001946253545, "calib/mean_conf": 0.9864062500000002, "calib/mu_c": 0.9901092896174863, "calib/mu_w": 0.9771232876712328, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2715624999999998, "calib/std_conf": 0.059276027708825944, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1276.0, "completions/max_terminated_length": 1276.0, "completions/mean_length": 522.046875, "completions/mean_terminated_length": 530.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.20906666666666668, "grad_norm": 0.014148207381367683, "learning_rate": 1.1111111111111112e-07, "loss": -0.0091, "num_tokens": 46077915.0, "reward": 1.6046156883239746, "reward_std": 0.1983228623867035, "rewards/accuracy_reward_step": 0.71484375, "rewards/brier_reward_group": 0.7881472706794739, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.770940899848938, "step": 196 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4980625161456988, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.37289062500000014, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.87496770861695e-05, "calib/mean_conf": 0.9900781250000001, "calib/mu_c": 0.9900632911392404, "calib/mu_w": 0.9901020408163266, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37289062500000014, "calib/std_conf": 0.0008804240366863013, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 580.67578125, "completions/mean_terminated_length": 589.8928833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.21013333333333334, "grad_norm": 0.01780375838279724, "learning_rate": 8.333333333333334e-08, "loss": 0.0089, "num_tokens": 46331624.0, "reward": 1.4571163654327393, "reward_std": 0.21886040270328522, "rewards/accuracy_reward_step": 0.6171875, "rewards/brier_reward_group": 0.6930257678031921, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6666896343231201, "step": 197 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49707622694048037, "calib/avg_num_step_conf": 6.0625, "calib/ece": 0.324724409448819, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -5.847546119019942e-05, "calib/mean_conf": 0.9900787401574804, "calib/mu_c": 0.9900591715976331, "calib/mu_w": 0.9901176470588233, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.324724409448819, "calib/std_conf": 0.0008838560756158927, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1807.0, "completions/max_terminated_length": 1807.0, "completions/mean_length": 554.56640625, "completions/mean_terminated_length": 563.3690795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 234.0, "epoch": 0.2112, "grad_norm": 0.008209271356463432, "learning_rate": 5.555555555555556e-08, "loss": -0.0367, "num_tokens": 46578977.0, "reward": 1.529144287109375, "reward_std": 0.24424126744270325, "rewards/accuracy_reward_step": 0.66015625, "rewards/brier_reward_group": 0.739728569984436, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7596610188484192, "step": 198 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49287217757367013, "calib/avg_num_step_conf": 5.9375, "calib/ece": 0.19561264822134394, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00014255644852667615, "calib/mean_conf": 0.9900790513833992, "calib/mu_c": 0.9900497512437811, "calib/mu_w": 0.9901923076923078, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19561264822134394, "calib/std_conf": 0.0008855872135339169, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2442.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 619.68359375, "completions/mean_terminated_length": 632.0278930664062, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.21226666666666666, "grad_norm": 0.02195962518453598, "learning_rate": 2.777777777777778e-08, "loss": 0.0067, "num_tokens": 46841816.0, "reward": 1.6957043409347534, "reward_std": 0.17761465907096863, "rewards/accuracy_reward_step": 0.78515625, "rewards/brier_reward_group": 0.8301656246185303, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8354641795158386, "step": 199 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5145348837209303, "calib/avg_num_step_conf": 6.203125, "calib/ece": 0.3156862745098039, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00029069767441858296, "calib/mean_conf": 0.9901960784313726, "calib/mu_c": 0.9902906976744185, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3156862745098039, "calib/std_conf": 0.001386483884679506, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1513.0, "completions/max_terminated_length": 1513.0, "completions/mean_length": 619.43359375, "completions/mean_terminated_length": 629.2659301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.21333333333333335, "grad_norm": 0.008899247273802757, "learning_rate": 0.0, "loss": -0.005, "num_tokens": 47108439.0, "reward": 1.5375908613204956, "reward_std": 0.1821175068616867, "rewards/accuracy_reward_step": 0.671875, "rewards/brier_reward_group": 0.7398461699485779, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7308297157287598, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": -0.005905325598432682, "train_runtime": 13072.8321, "train_samples_per_second": 3.917, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 47108439, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }