{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.01171875, "calib/ece": 0.6500000000000001, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.025000000000000022, "calib/mean_conf": 0.9833333333333334, "calib/mu_c": 1.0, "calib/mu_w": 0.975, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.6500000000000001, "calib/std_conf": 0.023570226039551608, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 695.9765625, "completions/mean_terminated_length": 748.6134643554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0008491715998388827, "learning_rate": 2.5000000000000004e-07, "loss": 0.0006, "num_tokens": 235322.0, "reward": 0.006670608185231686, "reward_std": 0.018867328763008118, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0016824332997202873, "step": 1 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.04296875, "calib/ece": 1.0, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 644.19140625, "completions/mean_terminated_length": 717.0130004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0009866019245237112, "learning_rate": 5.000000000000001e-07, "loss": 0.0083, "num_tokens": 453091.0, "reward": 0.001361193018965423, "reward_std": 0.0038500353693962097, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0038822719361633062, "step": 2 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.01171875, "calib/ece": 1.0, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 710.796875, "completions/mean_terminated_length": 815.9821166992188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.0018382910639047623, "learning_rate": 7.5e-07, "loss": -0.0051, "num_tokens": 689879.0, "reward": 0.0013669448671862483, "reward_std": 0.003866303712129593, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0039052795618772507, "step": 3 }, { "calib/answer_extract_rate": 0.0234375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.6666666666666667, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 1.0, "calib/mu_c": 1.0, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.6666666666666667, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 687.3828125, "completions/mean_terminated_length": 752.0086059570312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.001273542526178062, "learning_rate": 1.0000000000000002e-06, "loss": -0.0006, "num_tokens": 921585.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 4 }, { "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.10546875, "calib/ece": 0.5933333333333333, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/mean_conf": 0.5933333333333334, "calib/mu_c": NaN, "calib/mu_w": 0.5933333333333334, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.5933333333333333, "calib/std_conf": 0.4112041936664665, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 727.7109375, "completions/mean_terminated_length": 839.1621704101562, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.005333333333333333, "grad_norm": 0.0022040223702788353, "learning_rate": 1.25e-06, "loss": -0.0072, "num_tokens": 1164135.0, "reward": 0.004656470380723476, "reward_std": 0.013170486316084862, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0039027344901114702, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.007695412263274193, "step": 5 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.05078125, "calib/ece": 1.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2993.0, "completions/max_terminated_length": 2993.0, "completions/mean_length": 641.921875, "completions/mean_terminated_length": 711.3939208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.0010836443398147821, "learning_rate": 1.5e-06, "loss": 0.0118, "num_tokens": 1383987.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 6 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.4444444444444445, "calib/avg_num_step_conf": 0.171875, "calib/ece": 0.3, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.2666666666666666, "calib/mean_conf": 0.7999999999999999, "calib/mu_c": 0.9333333333333332, "calib/mu_w": 0.6666666666666666, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.3, "calib/std_conf": 0.36055512754639896, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 750.55078125, "completions/mean_terminated_length": 831.7792358398438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.0019379056757315993, "learning_rate": 1.75e-06, "loss": -0.0039, "num_tokens": 1633120.0, "reward": 0.01432965137064457, "reward_std": 0.04053037613630295, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0010686073219403625, "step": 7 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.0234375, "calib/ece": 0.07200000000000001, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.8200000000000001, "calib/mean_conf": 0.6719999999999999, "calib/mu_c": 1.0, "calib/mu_w": 0.18, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.07200000000000001, "calib/std_conf": 0.40191541398657504, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 2928.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 639.515625, "completions/mean_terminated_length": 718.0526123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.0021520222071558237, "learning_rate": 2.0000000000000003e-06, "loss": -0.0034, "num_tokens": 1852916.0, "reward": 0.01730281114578247, "reward_std": 0.048939742147922516, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0078125, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.0035862457007169724, "step": 8 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.11328125, "calib/ece": 0.8, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 1.0, "calib/mu_c": 1.0, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.8, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 2987.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 724.1484375, "completions/mean_terminated_length": 823.9200439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.0006597324390895665, "learning_rate": 2.25e-06, "loss": 0.0056, "num_tokens": 2095402.0, "reward": 0.007182005792856216, "reward_std": 0.020313778892159462, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.003728023497387767, "step": 9 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 1.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 663.19140625, "completions/mean_terminated_length": 751.2256469726562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.001061733695678413, "learning_rate": 2.5e-06, "loss": 0.0006, "num_tokens": 2321547.0, "reward": 0.0005708447424694896, "reward_std": 0.0016145927365869284, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0007208790048025548, "step": 10 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.0390625, "calib/ece": 0.8333333333333333, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/mean_conf": 0.8333333333333334, "calib/mu_c": NaN, "calib/mu_w": 0.8333333333333334, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.8333333333333333, "calib/std_conf": 0.23570226039551584, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 711.1015625, "completions/mean_terminated_length": 788.0606079101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.0012463730527088046, "learning_rate": 2.7500000000000004e-06, "loss": 0.002, "num_tokens": 2557637.0, "reward": 0.003642414230853319, "reward_std": 0.010302303358912468, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0029296875, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.005585282109677792, "step": 11 }, { "calib/answer_extract_rate": 0.046875, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.09765625, "calib/ece": 0.18894999999999998, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.43315000000000003, "calib/mean_conf": 0.8556166666666667, "calib/mu_c": 1.0, "calib/mu_w": 0.56685, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.18894999999999998, "calib/std_conf": 0.2078045589543748, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 715.61328125, "completions/mean_terminated_length": 803.49560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0128, "grad_norm": 0.002766356337815523, "learning_rate": 3e-06, "loss": -0.001, "num_tokens": 2794578.0, "reward": 0.030658047646284103, "reward_std": 0.07015920430421829, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.013079782947897911, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.012097620405256748, "step": 12 }, { "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.5333333333333333, "calib/avg_num_step_conf": 0.04296875, "calib/ece": 0.4600000000000001, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.625, "calib/gap": 0.21733333333333338, "calib/mean_conf": 0.8075000000000001, "calib/mu_c": 0.9433333333333334, "calib/mu_w": 0.726, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.4462500000000001, "calib/std_conf": 0.29179401981534847, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 739.73046875, "completions/mean_terminated_length": 799.03369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.013866666666666666, "grad_norm": 0.0021508962381631136, "learning_rate": 3.2500000000000002e-06, "loss": 0.0014, "num_tokens": 3038109.0, "reward": 0.018178798258304596, "reward_std": 0.045451875776052475, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.002724609337747097, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0032034749165177345, "step": 13 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.25, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.5900000000000001, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.2, "calib/gap": -0.11250000000000004, "calib/mean_conf": 0.5900000000000001, "calib/mu_c": 0.5, "calib/mu_w": 0.6125, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.49, "calib/std_conf": 0.33526109228480416, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 647.03125, "completions/mean_terminated_length": 710.9013061523438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.0023395600728690624, "learning_rate": 3.5e-06, "loss": -0.0113, "num_tokens": 3258717.0, "reward": 0.0048194024711847305, "reward_std": 0.01363132894039154, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.002090109745040536, "step": 14 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.0078125, "calib/ece": 0.5, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.5, "calib/mu_c": NaN, "calib/mu_w": 0.5, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.5, "calib/std_conf": 0.5, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 690.16015625, "completions/mean_terminated_length": 761.5560302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.0009692964958958328, "learning_rate": 3.7500000000000005e-06, "loss": 0.0037, "num_tokens": 3492846.0, "reward": 0.002595576224848628, "reward_std": 0.007341398391872644, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0010073042940348387, "step": 15 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.109375, "calib/ece": 0.7614285714285716, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.11166666666666669, "calib/mean_conf": 0.9042857142857142, "calib/mu_c": 1.0, "calib/mu_w": 0.8883333333333333, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.7614285714285716, "calib/std_conf": 0.15989792662437965, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 714.375, "completions/mean_terminated_length": 781.5385131835938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.0022340205032378435, "learning_rate": 4.000000000000001e-06, "loss": 0.0222, "num_tokens": 3734142.0, "reward": 0.012918464839458466, "reward_std": 0.03653893619775772, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.00390390632674098, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.007928548380732536, "step": 16 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.5416666666666666, "calib/avg_num_step_conf": 0.06640625, "calib/ece": 0.52, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.42857142857142855, "calib/gap": 0.025833333333333264, "calib/mean_conf": 0.7485714285714286, "calib/mu_c": 0.7633333333333333, "calib/mu_w": 0.7375, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.42000000000000004, "calib/std_conf": 0.272576519868593, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 663.37109375, "completions/mean_terminated_length": 754.7689208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.0030176357831805944, "learning_rate": 4.25e-06, "loss": 0.0024, "num_tokens": 3957061.0, "reward": 0.02414598874747753, "reward_std": 0.06829516589641571, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01416953094303608, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.013557392172515392, "step": 17 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.19999999999999996, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.7581699346405228, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": -0.4431372549019608, "calib/mean_conf": 0.7026143790849674, "calib/mu_c": 0.3333333333333333, "calib/mu_w": 0.7764705882352941, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.6470588235294117, "calib/std_conf": 0.35300047916212857, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 797.18359375, "completions/mean_terminated_length": 861.0927734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.001272398978471756, "learning_rate": 4.5e-06, "loss": 0.0018, "num_tokens": 4221428.0, "reward": 0.011318582110106945, "reward_std": 0.03201378509402275, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0069023785181343555, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.011157071217894554, "step": 18 }, { "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.59375, "calib/avg_num_step_conf": 0.12109375, "calib/ece": 0.76, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.018749999999999933, "calib/mean_conf": 0.96, "calib/mu_c": 0.975, "calib/mu_w": 0.95625, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.76, "calib/std_conf": 0.0521536192416212, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 641.44921875, "completions/mean_terminated_length": 701.7564697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.002642231062054634, "learning_rate": 4.75e-06, "loss": -0.0041, "num_tokens": 4439967.0, "reward": 0.021389827132225037, "reward_std": 0.060499563813209534, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.004428906366229057, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.007951493375003338, "step": 19 }, { "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.5636363636363636, "calib/avg_num_step_conf": 0.171875, "calib/ece": 0.5309686875, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6875, "calib/gap": 0.16950009090909102, "calib/mean_conf": 0.7934686875, "calib/mu_c": 0.9100000000000001, "calib/mu_w": 0.7404999090909091, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.5059686875, "calib/std_conf": 0.34650823595131164, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 664.63671875, "completions/mean_terminated_length": 724.02978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.0028716290835291147, "learning_rate": 5e-06, "loss": 0.012, "num_tokens": 4664554.0, "reward": 0.03895442187786102, "reward_std": 0.09528942406177521, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.016669921576976776, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.028727836906909943, "step": 20 }, { "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.11111111111111116, "calib/avg_num_step_conf": 0.0703125, "calib/ece": 0.8533333333333333, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": -0.3925925925925926, "calib/mean_conf": 0.8533333333333333, "calib/mu_c": 0.5, "calib/mu_w": 0.8925925925925926, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.8033333333333332, "calib/std_conf": 0.2386070689089771, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 706.3203125, "completions/mean_terminated_length": 776.0429077148438, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.0224, "grad_norm": 0.0019160123774781823, "learning_rate": 4.9722222222222224e-06, "loss": -0.0088, "num_tokens": 4897900.0, "reward": 0.01568451151251793, "reward_std": 0.034691423177719116, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.005464409478008747, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.014309222809970379, "step": 21 }, { "calib/answer_extract_rate": 0.125, "calib/auroc": 0.5423076923076923, "calib/avg_num_step_conf": 0.34375, "calib/ece": 0.5052173913043478, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.5217391304347826, "calib/gap": -0.04792307692307696, "calib/mean_conf": 0.8260869565217392, "calib/mu_c": 0.7989999999999999, "calib/mu_w": 0.8469230769230769, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.16015625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.4482608695652174, "calib/std_conf": 0.2418720310310497, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 691.10546875, "completions/mean_terminated_length": 749.6737060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.023466666666666667, "grad_norm": 0.0035497175995260477, "learning_rate": 4.944444444444445e-06, "loss": 0.0433, "num_tokens": 5126207.0, "reward": 0.06569827347993851, "reward_std": 0.15479235351085663, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.02086445316672325, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.033564187586307526, "step": 22 }, { "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.41000000000000003, "calib/avg_num_step_conf": 0.12109375, "calib/ece": 0.71024, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.08570000000000011, "calib/mean_conf": 0.8265600000000001, "calib/mu_c": 0.758, "calib/mu_w": 0.8437000000000001, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6684, "calib/std_conf": 0.2827903930475716, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 651.21875, "completions/mean_terminated_length": 700.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.024533333333333334, "grad_norm": 0.0025646642316132784, "learning_rate": 4.9166666666666665e-06, "loss": 0.0179, "num_tokens": 5346423.0, "reward": 0.0344095416367054, "reward_std": 0.08778439462184906, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.010654687881469727, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.013203799724578857, "step": 23 }, { "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.6173469387755102, "calib/avg_num_step_conf": 0.43359375, "calib/ece": 0.6334669523809523, "calib/final_conf_rate": 0.13671875, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.5428571428571428, "calib/gap": 0.12780916666666686, "calib/mean_conf": 0.8334669523809523, "calib/mu_c": 0.9357142857142857, "calib/mu_w": 0.8079051190476189, "calib/nonempty_final_conf_rate": 0.13671875, "calib/nonempty_reasoning_rate": 0.21875, "calib/nonempty_step_conf_rate": 0.140625, "calib/pce": 0.6334669523809523, "calib/std_conf": 0.2826246802649036, "calib/step_conf_rate": 0.140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 663.66796875, "completions/mean_terminated_length": 729.1802368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0256, "grad_norm": 0.003742236876860261, "learning_rate": 4.888888888888889e-06, "loss": 0.0156, "num_tokens": 5570402.0, "reward": 0.07192394882440567, "reward_std": 0.15089201927185059, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.031522128731012344, "rewards/format_reward_step": 0.0703125, "rewards/stepwise_brier_reward": 0.04027654230594635, "step": 24 }, { "calib/answer_extract_rate": 0.30859375, "calib/auroc": 0.5073198198198198, "calib/avg_num_step_conf": 0.6171875, "calib/ece": 0.5209515893442624, "calib/final_conf_rate": 0.23828125, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.6065573770491803, "calib/gap": 0.03322846081081088, "calib/mean_conf": 0.8173450319672131, "calib/mu_c": 0.8375, "calib/mu_w": 0.8042715391891891, "calib/nonempty_final_conf_rate": 0.23828125, "calib/nonempty_reasoning_rate": 0.33203125, "calib/nonempty_step_conf_rate": 0.2109375, "calib/pce": 0.47242699918032793, "calib/std_conf": 0.31910268544969794, "calib/step_conf_rate": 0.2109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 654.21875, "completions/mean_terminated_length": 692.0661010742188, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.02666666666666667, "grad_norm": 0.00479081878438592, "learning_rate": 4.861111111111111e-06, "loss": 0.0453, "num_tokens": 5790674.0, "reward": 0.17957602441310883, "reward_std": 0.29742905497550964, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.07784279435873032, "rewards/format_reward_step": 0.16015625, "rewards/stepwise_brier_reward": 0.09230600297451019, "step": 25 }, { "calib/answer_extract_rate": 0.29296875, "calib/auroc": 0.6555851063829787, "calib/avg_num_step_conf": 0.8671875, "calib/ece": 0.5797640692640693, "calib/final_conf_rate": 0.24609375, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.6349206349206349, "calib/gap": 0.12377369439071562, "calib/mean_conf": 0.7851608946608948, "calib/mu_c": 0.8775, "calib/mu_w": 0.7537263056092843, "calib/nonempty_final_conf_rate": 0.24609375, "calib/nonempty_reasoning_rate": 0.34765625, "calib/nonempty_step_conf_rate": 0.2109375, "calib/pce": 0.5554783549783551, "calib/std_conf": 0.3259539828550834, "calib/step_conf_rate": 0.2109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 664.82421875, "completions/mean_terminated_length": 703.2850952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.027733333333333332, "grad_norm": 0.0048740701749920845, "learning_rate": 4.833333333333333e-06, "loss": 0.0664, "num_tokens": 6015677.0, "reward": 0.1445264220237732, "reward_std": 0.300734281539917, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.057161130011081696, "rewards/format_reward_step": 0.12890625, "rewards/stepwise_brier_reward": 0.09972096979618073, "step": 26 }, { "calib/answer_extract_rate": 0.41015625, "calib/auroc": 0.5616554054054055, "calib/avg_num_step_conf": 0.71484375, "calib/ece": 0.6403833622462044, "calib/final_conf_rate": 0.3515625, "calib/format_rate": 0.2109375, "calib/frac_conf_gt_0.9": 0.5666666666666667, "calib/gap": 0.09493070807894066, "calib/mean_conf": 0.8003833622462044, "calib/mu_c": 0.8784375, "calib/mu_w": 0.7835067919210593, "calib/nonempty_final_conf_rate": 0.3515625, "calib/nonempty_reasoning_rate": 0.4453125, "calib/nonempty_step_conf_rate": 0.28515625, "calib/pce": 0.6314944733573155, "calib/std_conf": 0.3002138648992004, "calib/step_conf_rate": 0.28515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 554.75, "completions/mean_terminated_length": 591.7333374023438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0288, "grad_norm": 0.0055619413033127785, "learning_rate": 4.805555555555556e-06, "loss": 0.0146, "num_tokens": 6212477.0, "reward": 0.1599256843328476, "reward_std": 0.2821913957595825, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.08129120618104935, "rewards/format_reward_step": 0.2109375, "rewards/stepwise_brier_reward": 0.12712031602859497, "step": 27 }, { "calib/answer_extract_rate": 0.375, "calib/auroc": 0.6194379391100702, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.5206668539325843, "calib/final_conf_rate": 0.34765625, "calib/format_rate": 0.19921875, "calib/frac_conf_gt_0.9": 0.5617977528089888, "calib/gap": 0.13378070843091328, "calib/mean_conf": 0.7948522471910112, "calib/mu_c": 0.8865446428571427, "calib/mu_w": 0.7527639344262295, "calib/nonempty_final_conf_rate": 0.34765625, "calib/nonempty_reasoning_rate": 0.4453125, "calib/nonempty_step_conf_rate": 0.30078125, "calib/pce": 0.5004561797752809, "calib/std_conf": 0.3285569415111707, "calib/step_conf_rate": 0.30078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 676.78515625, "completions/mean_terminated_length": 698.616943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.029866666666666666, "grad_norm": 0.0048601808957755566, "learning_rate": 4.777777777777778e-06, "loss": 0.0558, "num_tokens": 6442246.0, "reward": 0.22021643817424774, "reward_std": 0.36596205830574036, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.10262969881296158, "rewards/format_reward_step": 0.19921875, "rewards/stepwise_brier_reward": 0.12716886401176453, "step": 28 }, { "calib/answer_extract_rate": 0.59765625, "calib/auroc": 0.5707516339869282, "calib/avg_num_step_conf": 1.4375, "calib/ece": 0.5525328282827525, "calib/final_conf_rate": 0.515625, "calib/format_rate": 0.2734375, "calib/frac_conf_gt_0.9": 0.5303030303030303, "calib/gap": 0.08905163398702609, "calib/mean_conf": 0.7535207070706315, "calib/mu_c": 0.8223333333333334, "calib/mu_w": 0.7332816993463073, "calib/nonempty_final_conf_rate": 0.515625, "calib/nonempty_reasoning_rate": 0.66796875, "calib/nonempty_step_conf_rate": 0.38671875, "calib/pce": 0.5393904040403283, "calib/std_conf": 0.33802872458602384, "calib/step_conf_rate": 0.38671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2407.0, "completions/max_terminated_length": 2407.0, "completions/mean_length": 568.72265625, "completions/mean_terminated_length": 582.3720092773438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.030933333333333334, "grad_norm": 0.005880790762603283, "learning_rate": 4.75e-06, "loss": 0.0012, "num_tokens": 6644535.0, "reward": 0.26979178190231323, "reward_std": 0.4047597050666809, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.1319660246372223, "rewards/format_reward_step": 0.2734375, "rewards/stepwise_brier_reward": 0.19023501873016357, "step": 29 }, { "calib/answer_extract_rate": 0.5546875, "calib/auroc": 0.5448717948717949, "calib/avg_num_step_conf": 1.671875, "calib/ece": 0.6405882666666667, "calib/final_conf_rate": 0.48828125, "calib/format_rate": 0.25390625, "calib/frac_conf_gt_0.9": 0.584, "calib/gap": 0.004297798497798655, "calib/mean_conf": 0.8063397333333333, "calib/mu_c": 0.8097435897435898, "calib/mu_w": 0.8054457912457912, "calib/nonempty_final_conf_rate": 0.48828125, "calib/nonempty_reasoning_rate": 0.63671875, "calib/nonempty_step_conf_rate": 0.37109375, "calib/pce": 0.619464, "calib/std_conf": 0.29629078106769813, "calib/step_conf_rate": 0.37109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2994.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 626.8359375, "completions/mean_terminated_length": 647.0564575195312, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.032, "grad_norm": 0.005236665718257427, "learning_rate": 4.722222222222222e-06, "loss": 0.0359, "num_tokens": 6861557.0, "reward": 0.22299976646900177, "reward_std": 0.34176692366600037, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.09989957511425018, "rewards/format_reward_step": 0.25390625, "rewards/stepwise_brier_reward": 0.15313737094402313, "step": 30 }, { "calib/answer_extract_rate": 0.671875, "calib/auroc": 0.5393345771144278, "calib/avg_num_step_conf": 2.37890625, "calib/ece": 0.6282628691983124, "calib/final_conf_rate": 0.6171875, "calib/format_rate": 0.359375, "calib/frac_conf_gt_0.9": 0.46835443037974683, "calib/gap": 0.06460031094527363, "calib/mean_conf": 0.7564248945147679, "calib/mu_c": 0.8112125, "calib/mu_w": 0.7466121890547264, "calib/nonempty_final_conf_rate": 0.6171875, "calib/nonempty_reasoning_rate": 0.78515625, "calib/nonempty_step_conf_rate": 0.515625, "calib/pce": 0.6163945147679326, "calib/std_conf": 0.3317355800368736, "calib/step_conf_rate": 0.515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 551.66015625, "completions/mean_terminated_length": 562.6494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.03306666666666667, "grad_norm": 0.00622685207054019, "learning_rate": 4.694444444444445e-06, "loss": 0.0125, "num_tokens": 7058262.0, "reward": 0.280985951423645, "reward_std": 0.4292905032634735, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.15985578298568726, "rewards/format_reward_step": 0.359375, "rewards/stepwise_brier_reward": 0.22298219799995422, "step": 31 }, { "calib/answer_extract_rate": 0.71875, "calib/auroc": 0.6274630541871921, "calib/avg_num_step_conf": 1.96484375, "calib/ece": 0.5990098619329389, "calib/final_conf_rate": 0.66015625, "calib/format_rate": 0.4296875, "calib/frac_conf_gt_0.9": 0.4970414201183432, "calib/gap": 0.14379458128078815, "calib/mean_conf": 0.7640986193293887, "calib/mu_c": 0.8832183908045977, "calib/mu_w": 0.7394238095238096, "calib/nonempty_final_conf_rate": 0.66015625, "calib/nonempty_reasoning_rate": 0.78515625, "calib/nonempty_step_conf_rate": 0.53125, "calib/pce": 0.5957554240631164, "calib/std_conf": 0.3136796244204092, "calib/step_conf_rate": 0.53125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 557.6484375, "completions/mean_terminated_length": 562.0393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.034133333333333335, "grad_norm": 0.006053155288100243, "learning_rate": 4.666666666666667e-06, "loss": 0.101, "num_tokens": 7257292.0, "reward": 0.3460780680179596, "reward_std": 0.4018341302871704, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.21058638393878937, "rewards/format_reward_step": 0.4296875, "rewards/stepwise_brier_reward": 0.29126444458961487, "step": 32 }, { "calib/answer_extract_rate": 0.796875, "calib/auroc": 0.45940170940170943, "calib/avg_num_step_conf": 2.1171875, "calib/ece": 0.6480941329690347, "calib/final_conf_rate": 0.71484375, "calib/format_rate": 0.45703125, "calib/frac_conf_gt_0.9": 0.4918032786885246, "calib/gap": -0.005150899572649581, "calib/mean_conf": 0.7671575974499089, "calib/mu_c": 0.7627666666666667, "calib/mu_w": 0.7679175662393163, "calib/nonempty_final_conf_rate": 0.71484375, "calib/nonempty_reasoning_rate": 0.84375, "calib/nonempty_step_conf_rate": 0.55078125, "calib/pce": 0.6338553734061931, "calib/std_conf": 0.3217574897598852, "calib/step_conf_rate": 0.55078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2110.0, "completions/max_terminated_length": 2110.0, "completions/mean_length": 524.734375, "completions/mean_terminated_length": 528.8661499023438, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0352, "grad_norm": 0.00645009521394968, "learning_rate": 4.638888888888889e-06, "loss": 0.0209, "num_tokens": 7448064.0, "reward": 0.31928080320358276, "reward_std": 0.4097619652748108, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.1797189563512802, "rewards/format_reward_step": 0.45703125, "rewards/stepwise_brier_reward": 0.2973727583885193, "step": 33 }, { "calib/answer_extract_rate": 0.8046875, "calib/auroc": 0.5357838364167478, "calib/avg_num_step_conf": 2.69140625, "calib/ece": 0.5988974619117597, "calib/final_conf_rate": 0.76953125, "calib/format_rate": 0.55078125, "calib/frac_conf_gt_0.9": 0.4619289340101523, "calib/gap": 0.05750012984875852, "calib/mean_conf": 0.7819600676652284, "calib/mu_c": 0.8280769230769232, "calib/mu_w": 0.7705767932281646, "calib/nonempty_final_conf_rate": 0.76953125, "calib/nonempty_reasoning_rate": 0.8671875, "calib/nonempty_step_conf_rate": 0.6484375, "calib/pce": 0.59144399321489, "calib/std_conf": 0.29237446979176485, "calib/step_conf_rate": 0.6484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2743.0, "completions/max_terminated_length": 2743.0, "completions/mean_length": 469.76953125, "completions/mean_terminated_length": 469.76953125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.03626666666666667, "grad_norm": 0.0064404685981571674, "learning_rate": 4.611111111111112e-06, "loss": 0.0049, "num_tokens": 7623005.0, "reward": 0.4331265687942505, "reward_std": 0.48528268933296204, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.23728707432746887, "rewards/format_reward_step": 0.55078125, "rewards/stepwise_brier_reward": 0.35011959075927734, "step": 34 }, { "calib/answer_extract_rate": 0.8125, "calib/auroc": 0.5960097001763669, "calib/avg_num_step_conf": 2.53125, "calib/ece": 0.5851736689478186, "calib/final_conf_rate": 0.76171875, "calib/format_rate": 0.5078125, "calib/frac_conf_gt_0.9": 0.38461538461538464, "calib/gap": 0.10365081415117605, "calib/mean_conf": 0.7077378740760236, "calib/mu_c": 0.797037037037037, "calib/mu_w": 0.6933862228858609, "calib/nonempty_final_conf_rate": 0.76171875, "calib/nonempty_reasoning_rate": 0.88671875, "calib/nonempty_step_conf_rate": 0.63671875, "calib/pce": 0.577225002281152, "calib/std_conf": 0.3280189492838859, "calib/step_conf_rate": 0.63671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2338.0, "completions/max_terminated_length": 2338.0, "completions/mean_length": 483.44140625, "completions/mean_terminated_length": 485.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.037333333333333336, "grad_norm": 0.005937372334301472, "learning_rate": 4.583333333333333e-06, "loss": 0.052, "num_tokens": 7805590.0, "reward": 0.3720160722732544, "reward_std": 0.4141312539577484, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.23158225417137146, "rewards/format_reward_step": 0.5078125, "rewards/stepwise_brier_reward": 0.3530246913433075, "step": 35 }, { "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.5105847953216375, "calib/avg_num_step_conf": 3.24609375, "calib/ece": 0.5147375565610861, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.6875, "calib/frac_conf_gt_0.9": 0.33031674208144796, "calib/gap": 0.036539454191033105, "calib/mean_conf": 0.7180407239819006, "calib/mu_c": 0.7463133333333333, "calib/mu_w": 0.7097738791423002, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.80078125, "calib/pce": 0.503266968325792, "calib/std_conf": 0.30357006466972075, "calib/step_conf_rate": 0.80078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1571.0, "completions/max_terminated_length": 1571.0, "completions/mean_length": 366.625, "completions/mean_terminated_length": 369.5118103027344, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.0384, "grad_norm": 0.007062575314193964, "learning_rate": 4.555555555555556e-06, "loss": -0.0377, "num_tokens": 7951726.0, "reward": 0.5609018802642822, "reward_std": 0.5295820832252502, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.33417004346847534, "rewards/format_reward_step": 0.6875, "rewards/stepwise_brier_reward": 0.5033923387527466, "step": 36 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5949754901960784, "calib/avg_num_step_conf": 3.35546875, "calib/ece": 0.5149847293577982, "calib/final_conf_rate": 0.8515625, "calib/format_rate": 0.6953125, "calib/frac_conf_gt_0.9": 0.3211009174311927, "calib/gap": 0.09309000098039222, "calib/mean_conf": 0.7101464633027524, "calib/mu_c": 0.7827395833333334, "calib/mu_w": 0.6896495823529412, "calib/nonempty_final_conf_rate": 0.8515625, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.80859375, "calib/pce": 0.5024738532110092, "calib/std_conf": 0.31330190768992244, "calib/step_conf_rate": 0.80859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1503.0, "completions/max_terminated_length": 1503.0, "completions/mean_length": 400.57421875, "completions/mean_terminated_length": 402.1451110839844, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.039466666666666664, "grad_norm": 0.007437742780894041, "learning_rate": 4.527777777777778e-06, "loss": -0.0125, "num_tokens": 8110937.0, "reward": 0.5722134113311768, "reward_std": 0.5186419486999512, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.3603786826133728, "rewards/format_reward_step": 0.6953125, "rewards/stepwise_brier_reward": 0.4930964410305023, "step": 37 }, { "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5174500587544065, "calib/avg_num_step_conf": 3.9296875, "calib/ece": 0.5161535353535354, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.7734375, "calib/frac_conf_gt_0.9": 0.2813852813852814, "calib/gap": 0.024159365452408932, "calib/mean_conf": 0.6646008658008659, "calib/mu_c": 0.6839492753623189, "calib/mu_w": 0.6597899099099099, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.84375, "calib/pce": 0.4908101010101011, "calib/std_conf": 0.330237801166244, "calib/step_conf_rate": 0.84375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1700.0, "completions/max_terminated_length": 1700.0, "completions/mean_length": 400.09765625, "completions/mean_terminated_length": 400.09765625, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.04053333333333333, "grad_norm": 0.007288274820894003, "learning_rate": 4.5e-06, "loss": 0.0499, "num_tokens": 8269818.0, "reward": 0.6101577877998352, "reward_std": 0.4628285765647888, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.4065845012664795, "rewards/format_reward_step": 0.7734375, "rewards/stepwise_brier_reward": 0.5368371605873108, "step": 38 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5635198135198135, "calib/avg_num_step_conf": 3.86328125, "calib/ece": 0.5480772151898735, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.828125, "calib/frac_conf_gt_0.9": 0.2742616033755274, "calib/gap": 0.08267072002071996, "calib/mean_conf": 0.7059675105485232, "calib/mu_c": 0.7750341880341879, "calib/mu_w": 0.6923634680134679, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.5447438818565402, "calib/std_conf": 0.30138604361317267, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 367.45703125, "completions/mean_terminated_length": 368.8980712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0416, "grad_norm": 0.007424803916364908, "learning_rate": 4.472222222222223e-06, "loss": 0.0231, "num_tokens": 8419543.0, "reward": 0.5983622670173645, "reward_std": 0.46255725622177124, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.413524329662323, "rewards/format_reward_step": 0.828125, "rewards/stepwise_brier_reward": 0.6101503372192383, "step": 39 }, { "calib/answer_extract_rate": 0.91015625, "calib/auroc": 0.5634037015615964, "calib/avg_num_step_conf": 3.9921875, "calib/ece": 0.5317317045454546, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.78515625, "calib/frac_conf_gt_0.9": 0.2681818181818182, "calib/gap": 0.05356498698669743, "calib/mean_conf": 0.6880555681818182, "calib/mu_c": 0.7323684210526316, "calib/mu_w": 0.6788034340659341, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.52353, "calib/std_conf": 0.30208778456217783, "calib/step_conf_rate": 0.88671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2104.0, "completions/max_terminated_length": 2104.0, "completions/mean_length": 412.3828125, "completions/mean_terminated_length": 412.3828125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.042666666666666665, "grad_norm": 0.00619457196444273, "learning_rate": 4.444444444444444e-06, "loss": 0.0332, "num_tokens": 8581441.0, "reward": 0.5800082683563232, "reward_std": 0.4767323136329651, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.4071369767189026, "rewards/format_reward_step": 0.78515625, "rewards/stepwise_brier_reward": 0.5666965842247009, "step": 40 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5014776184267711, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.44931404939256203, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.19421487603305784, "calib/gap": 0.0021802691825293374, "calib/mean_conf": 0.6237438018471075, "calib/mu_c": 0.6253384615384615, "calib/mu_w": 0.6231581923559322, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.4022314049586777, "calib/std_conf": 0.32450295148901953, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1259.0, "completions/max_terminated_length": 1259.0, "completions/mean_length": 369.44140625, "completions/mean_terminated_length": 370.8902282714844, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.04373333333333333, "grad_norm": 0.007128117140382528, "learning_rate": 4.416666666666667e-06, "loss": -0.0217, "num_tokens": 8732834.0, "reward": 0.7607706785202026, "reward_std": 0.4956275224685669, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.4987669587135315, "rewards/format_reward_step": 0.875, "rewards/stepwise_brier_reward": 0.6642985343933105, "step": 41 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5438486715440893, "calib/avg_num_step_conf": 4.0546875, "calib/ece": 0.37408041633186073, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.1285140562248996, "calib/gap": 0.04903052358507587, "calib/mean_conf": 0.57200946586332, "calib/mu_c": 0.6106037735849057, "calib/mu_w": 0.5615732499998298, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.36661923828634535, "calib/std_conf": 0.2964542103436392, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1379.0, "completions/max_terminated_length": 1379.0, "completions/mean_length": 351.52734375, "completions/mean_terminated_length": 352.9059143066406, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.0448, "grad_norm": 0.007647974416613579, "learning_rate": 4.388888888888889e-06, "loss": -0.0085, "num_tokens": 8876761.0, "reward": 0.7739803194999695, "reward_std": 0.38392341136932373, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.5873792767524719, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.7258501052856445, "step": 42 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5674390968508617, "calib/avg_num_step_conf": 4.23046875, "calib/ece": 0.3785844879302427, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.06827309236947791, "calib/gap": 0.06278427290293609, "calib/mean_conf": 0.5602712349181944, "calib/mu_c": 0.6101960784313727, "calib/mu_w": 0.5474118055284366, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.36701822287000174, "calib/std_conf": 0.2800565850887268, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1182.0, "completions/max_terminated_length": 1182.0, "completions/mean_length": 383.734375, "completions/mean_terminated_length": 385.2392272949219, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.04586666666666667, "grad_norm": 0.006165119353681803, "learning_rate": 4.361111111111112e-06, "loss": 0.0143, "num_tokens": 9029789.0, "reward": 0.7771089673042297, "reward_std": 0.40892308950424194, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.6033056974411011, "rewards/format_reward_step": 0.9296875, "rewards/stepwise_brier_reward": 0.733074426651001, "step": 43 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.525381414701803, "calib/avg_num_step_conf": 3.859375, "calib/ece": 0.3968818263847724, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.0954356846473029, "calib/gap": 0.023843388195407655, "calib/mean_conf": 0.5108190588157808, "calib/mu_c": 0.5311997142857143, "calib/mu_w": 0.5073563260903067, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.3812363347164591, "calib/std_conf": 0.30095908336175475, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2569.0, "completions/max_terminated_length": 2569.0, "completions/mean_length": 436.265625, "completions/mean_terminated_length": 436.265625, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.046933333333333334, "grad_norm": 0.0058610509149730206, "learning_rate": 4.333333333333334e-06, "loss": 0.0726, "num_tokens": 9197361.0, "reward": 0.711001992225647, "reward_std": 0.3738357722759247, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.6019132137298584, "rewards/format_reward_step": 0.90625, "rewards/stepwise_brier_reward": 0.7308066487312317, "step": 44 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5168803033038455, "calib/avg_num_step_conf": 4.140625, "calib/ece": 0.28319814602268856, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.03614457831325301, "calib/gap": 0.016902672306105537, "calib/mean_conf": 0.4587745447403636, "calib/mu_c": 0.47174004839685424, "calib/mu_w": 0.4548373760907487, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2545204819277108, "calib/std_conf": 0.2804498149052135, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2183.0, "completions/max_terminated_length": 2183.0, "completions/mean_length": 359.33984375, "completions/mean_terminated_length": 360.7490539550781, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.048, "grad_norm": 0.007440624758601189, "learning_rate": 4.305555555555556e-06, "loss": -0.0424, "num_tokens": 9343968.0, "reward": 0.8482845425605774, "reward_std": 0.3964950442314148, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.6546831130981445, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8009593486785889, "step": 45 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4976728064963359, "calib/avg_num_step_conf": 3.98828125, "calib/ece": 0.2929223560910308, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.040160642570281124, "calib/gap": -0.003249653396712293, "calib/mean_conf": 0.4712115127175368, "calib/mu_c": 0.4686274509803921, "calib/mu_w": 0.4718771043771044, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.27965729585006693, "calib/std_conf": 0.273293412815169, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2378.0, "completions/max_terminated_length": 2378.0, "completions/mean_length": 398.00390625, "completions/mean_terminated_length": 398.00390625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "epoch": 0.04906666666666667, "grad_norm": 0.005799505393952131, "learning_rate": 4.277777777777778e-06, "loss": 0.0685, "num_tokens": 9500193.0, "reward": 0.8074619770050049, "reward_std": 0.390902578830719, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.6363450288772583, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7915327548980713, "step": 46 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5262777608761788, "calib/avg_num_step_conf": 3.765625, "calib/ece": 0.26358985943775104, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.025204860054761247, "calib/mean_conf": 0.42389608433734943, "calib/mu_c": 0.44140789473684217, "calib/mu_w": 0.4162030346820809, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.19113253012048192, "calib/std_conf": 0.2799290623121091, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1986.0, "completions/max_terminated_length": 1986.0, "completions/mean_length": 387.8515625, "completions/mean_terminated_length": 389.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.050133333333333335, "grad_norm": 0.00642318045720458, "learning_rate": 4.25e-06, "loss": 0.0441, "num_tokens": 9655027.0, "reward": 0.9242119789123535, "reward_std": 0.4017278254032135, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.6625082492828369, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.8093312978744507, "step": 47 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.49449906367041196, "calib/avg_num_step_conf": 3.640625, "calib/ece": 0.23256000000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": -0.015096285892634176, "calib/mean_conf": 0.36556800000000006, "calib/mu_c": 0.35481944444444447, "calib/mu_w": 0.36991573033707864, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.155064, "calib/std_conf": 0.26178398609540654, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1252.0, "completions/max_terminated_length": 1252.0, "completions/mean_length": 392.76953125, "completions/mean_terminated_length": 394.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.0512, "grad_norm": 0.0055298516526818275, "learning_rate": 4.222222222222223e-06, "loss": 0.0002, "num_tokens": 9808832.0, "reward": 0.9315296411514282, "reward_std": 0.38359177112579346, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.6787214875221252, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8452380299568176, "step": 48 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5096815834767642, "calib/avg_num_step_conf": 3.80859375, "calib/ece": 0.19657109666666667, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.004966152467010931, "calib/mean_conf": 0.32814890333333335, "calib/mu_c": 0.3314464285714286, "calib/mu_w": 0.32648027610441765, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.09436, "calib/std_conf": 0.24305451370381934, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2213.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 414.0703125, "completions/mean_terminated_length": 414.0703125, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.05226666666666667, "grad_norm": 0.005227451678365469, "learning_rate": 4.194444444444445e-06, "loss": -0.0036, "num_tokens": 9968938.0, "reward": 0.9855952858924866, "reward_std": 0.35189497470855713, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.688926637172699, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8535905480384827, "step": 49 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5057762005436424, "calib/avg_num_step_conf": 3.59375, "calib/ece": 0.20230240062828192, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": 0.015120879435672696, "calib/mean_conf": 0.306632977246855, "calib/mu_c": 0.31707792207792207, "calib/mu_w": 0.3019570426422494, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09984921504198614, "calib/std_conf": 0.24515109608261892, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 453.44921875, "completions/mean_terminated_length": 453.44921875, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.05333333333333334, "grad_norm": 0.005167771130800247, "learning_rate": 4.166666666666667e-06, "loss": 0.0105, "num_tokens": 10139949.0, "reward": 0.9716469645500183, "reward_std": 0.34850674867630005, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7119664549827576, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8720299005508423, "step": 50 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49304724880382783, "calib/avg_num_step_conf": 3.57421875, "calib/ece": 0.2159089748677248, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.025391096989633155, "calib/mean_conf": 0.2811545171957672, "calib/mu_c": 0.263421052631579, "calib/mu_w": 0.28881214962121216, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.09773809523809524, "calib/std_conf": 0.23573949429671862, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1668.0, "completions/max_terminated_length": 1668.0, "completions/mean_length": 456.67578125, "completions/mean_terminated_length": 458.4667053222656, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0544, "grad_norm": 0.004893681034445763, "learning_rate": 4.138888888888889e-06, "loss": -0.0063, "num_tokens": 10315722.0, "reward": 0.9658059477806091, "reward_std": 0.32631462812423706, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7005055546760559, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8715876936912537, "step": 51 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5452707110241357, "calib/avg_num_step_conf": 3.24609375, "calib/ece": 0.22629946879150067, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.03927136333985651, "calib/mean_conf": 0.24937118193891103, "calib/mu_c": 0.27221428571428574, "calib/mu_w": 0.23294292237442923, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02867197875166003, "calib/std_conf": 0.2189799524439451, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1606.0, "completions/max_terminated_length": 1606.0, "completions/mean_length": 446.3203125, "completions/mean_terminated_length": 448.07061767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.055466666666666664, "grad_norm": 0.005250641144812107, "learning_rate": 4.111111111111111e-06, "loss": 0.0168, "num_tokens": 10487500.0, "reward": 1.0683614015579224, "reward_std": 0.3338378071784973, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6726921796798706, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8858733773231506, "step": 52 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5392747223337347, "calib/avg_num_step_conf": 3.7109375, "calib/ece": 0.21252964426877474, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.012974374414559092, "calib/mean_conf": 0.23881422924901186, "calib/mu_c": 0.246968085106383, "calib/mu_w": 0.2339937106918239, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03990118577075099, "calib/std_conf": 0.2071521969058112, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 477.48046875, "completions/mean_terminated_length": 479.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.05653333333333333, "grad_norm": 0.004486790858209133, "learning_rate": 4.083333333333334e-06, "loss": -0.0155, "num_tokens": 10665127.0, "reward": 1.0359400510787964, "reward_std": 0.2993074357509613, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.696070671081543, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8750563859939575, "step": 53 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5692757534862798, "calib/avg_num_step_conf": 3.390625, "calib/ece": 0.273, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03727813122549964, "calib/mean_conf": 0.24348000000000003, "calib/mu_c": 0.2633119658119658, "calib/mu_w": 0.22603383458646617, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.024240000000000008, "calib/std_conf": 0.1941139861009505, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1695.0, "completions/max_terminated_length": 1695.0, "completions/mean_length": 416.78125, "completions/mean_terminated_length": 416.78125, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.0576, "grad_norm": 0.0053199599497020245, "learning_rate": 4.055555555555556e-06, "loss": -0.026, "num_tokens": 10827623.0, "reward": 1.0795798301696777, "reward_std": 0.3540028929710388, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6427512168884277, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.8250043392181396, "step": 54 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5376742160278746, "calib/avg_num_step_conf": 3.55078125, "calib/ece": 0.15664137500000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024256932346109178, "calib/mean_conf": 0.22969362499999998, "calib/mu_c": 0.24599428353658537, "calib/mu_w": 0.2217373511904762, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.02916750000000001, "calib/std_conf": 0.17513404744870606, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2119.0, "completions/max_terminated_length": 2119.0, "completions/mean_length": 459.09375, "completions/mean_terminated_length": 460.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.058666666666666666, "grad_norm": 0.005128195509314537, "learning_rate": 4.027777777777779e-06, "loss": 0.0063, "num_tokens": 11002543.0, "reward": 0.9987612962722778, "reward_std": 0.29905104637145996, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.7148557901382446, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.885645866394043, "step": 55 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.44665404040404033, "calib/avg_num_step_conf": 3.4609375, "calib/ece": 0.2070972, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.02996595117845119, "calib/mean_conf": 0.2167668, "calib/mu_c": 0.19734886363636364, "calib/mu_w": 0.22731481481481483, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.035932000000000006, "calib/std_conf": 0.17167271762793293, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 462.81640625, "completions/mean_terminated_length": 464.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.05973333333333333, "grad_norm": 0.004981297068297863, "learning_rate": 4.000000000000001e-06, "loss": -0.0131, "num_tokens": 11177432.0, "reward": 1.016950011253357, "reward_std": 0.26780736446380615, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6897322535514832, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.9102107286453247, "step": 56 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4607095174518405, "calib/avg_num_step_conf": 3.52734375, "calib/ece": 0.30660039368897635, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.04047050037008071, "calib/mean_conf": 0.19673031497244098, "calib/mu_c": 0.17330841121495327, "calib/mu_w": 0.21377891158503398, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04103543307086615, "calib/std_conf": 0.18282944591609152, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2761.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 468.890625, "completions/mean_terminated_length": 468.890625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.0608, "grad_norm": 0.004824387840926647, "learning_rate": 3.972222222222223e-06, "loss": 0.0368, "num_tokens": 11353828.0, "reward": 1.0574250221252441, "reward_std": 0.3132486045360565, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6397988796234131, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8860392570495605, "step": 57 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5504757656853999, "calib/avg_num_step_conf": 3.39453125, "calib/ece": 0.15385889328063243, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025070755278025575, "calib/mean_conf": 0.19878932806324112, "calib/mu_c": 0.21632894736842104, "calib/mu_w": 0.19125819209039546, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.02612648221343874, "calib/std_conf": 0.16160948712497517, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1835.0, "completions/max_terminated_length": 1835.0, "completions/mean_length": 501.8984375, "completions/mean_terminated_length": 503.86669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.06186666666666667, "grad_norm": 0.004788158927112818, "learning_rate": 3.944444444444445e-06, "loss": 0.0162, "num_tokens": 11538202.0, "reward": 0.9966800808906555, "reward_std": 0.25661468505859375, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7510796785354614, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.903311014175415, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4838442144373673, "calib/avg_num_step_conf": 3.19921875, "calib/ece": 0.22699604743083004, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.010996549893842938, "calib/mean_conf": 0.21786561264822135, "calib/mu_c": 0.21104166666666666, "calib/mu_w": 0.2220382165605096, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03270750988142294, "calib/std_conf": 0.16889137972547114, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2421.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 512.00390625, "completions/mean_terminated_length": 512.00390625, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.06293333333333333, "grad_norm": 0.00462004030123353, "learning_rate": 3.916666666666667e-06, "loss": 0.0134, "num_tokens": 11725091.0, "reward": 1.0441310405731201, "reward_std": 0.30270540714263916, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6886898279190063, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8913315534591675, "step": 59 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44270611607776356, "calib/avg_num_step_conf": 3.421875, "calib/ece": 0.1872972549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.024478309919114566, "calib/mean_conf": 0.20370901960784313, "calib/mu_c": 0.18700617283950616, "calib/mu_w": 0.21148448275862072, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03667960784313726, "calib/std_conf": 0.15014198703756337, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1614.0, "completions/max_terminated_length": 1614.0, "completions/mean_length": 505.86328125, "completions/mean_terminated_length": 507.8470764160156, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.064, "grad_norm": 0.005143808200955391, "learning_rate": 3.88888888888889e-06, "loss": 0.0168, "num_tokens": 11913016.0, "reward": 1.0055668354034424, "reward_std": 0.26768332719802856, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.7277531623840332, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9058230519294739, "step": 60 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5587888198757764, "calib/avg_num_step_conf": 3.06640625, "calib/ece": 0.26793215686274513, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027863478260869545, "calib/mean_conf": 0.20516588235294117, "calib/mu_c": 0.22046347826086954, "calib/mu_w": 0.1926, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.011058823529411767, "calib/std_conf": 0.15339603202139135, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1553.0, "completions/max_terminated_length": 1553.0, "completions/mean_length": 452.15234375, "completions/mean_terminated_length": 453.9255065917969, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.06506666666666666, "grad_norm": 0.005178578197956085, "learning_rate": 3.861111111111112e-06, "loss": 0.0304, "num_tokens": 12082399.0, "reward": 1.0984981060028076, "reward_std": 0.27050405740737915, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6574486494064331, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8775324821472168, "step": 61 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46932493118933793, "calib/avg_num_step_conf": 3.65234375, "calib/ece": 0.1972505882352941, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.021880399826162522, "calib/mean_conf": 0.1715337254901961, "calib/mu_c": 0.15634615384615388, "calib/mu_w": 0.1782265536723164, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03145098039215686, "calib/std_conf": 0.12772045408627028, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 521.60546875, "completions/mean_terminated_length": 523.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.06613333333333334, "grad_norm": 0.004727810621261597, "learning_rate": 3.833333333333334e-06, "loss": 0.0075, "num_tokens": 12272578.0, "reward": 0.9986793994903564, "reward_std": 0.2413763403892517, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7333876490592957, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9138795733451843, "step": 62 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5539006604292791, "calib/avg_num_step_conf": 3.53515625, "calib/ece": 0.21146000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0214522564667034, "calib/mean_conf": 0.17454000000000003, "calib/mu_c": 0.1880978260869566, "calib/mu_w": 0.1666455696202532, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.009, "calib/std_conf": 0.1228197394558383, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 543.06640625, "completions/mean_terminated_length": 543.06640625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.0672, "grad_norm": 0.005185308866202831, "learning_rate": 3.8055555555555556e-06, "loss": 0.0757, "num_tokens": 12469811.0, "reward": 1.0210875272750854, "reward_std": 0.3115884065628052, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6924421191215515, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8791531324386597, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5953980099502487, "calib/avg_num_step_conf": 3.4296875, "calib/ece": 0.29338188472440946, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04624901564179104, "calib/mean_conf": 0.18748425700787402, "calib/mu_c": 0.211883344, "calib/mu_w": 0.16563432835820896, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.00421259842519685, "calib/std_conf": 0.14253125720249474, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2424.0, "completions/max_terminated_length": 2424.0, "completions/mean_length": 503.75390625, "completions/mean_terminated_length": 503.75390625, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.06826666666666667, "grad_norm": 0.0047699278220534325, "learning_rate": 3.777777777777778e-06, "loss": 0.0626, "num_tokens": 12652116.0, "reward": 1.1166967153549194, "reward_std": 0.29311853647232056, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6578893661499023, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8853832483291626, "step": 64 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5494801694262611, "calib/avg_num_step_conf": 3.34765625, "calib/ece": 0.24388339920948615, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02732608137594661, "calib/mean_conf": 0.1766699604743083, "calib/mu_c": 0.19254716981132078, "calib/mu_w": 0.16522108843537417, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007905138339920946, "calib/std_conf": 0.13015156399928848, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1157.0, "completions/max_terminated_length": 1157.0, "completions/mean_length": 454.59375, "completions/mean_terminated_length": 456.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.06933333333333333, "grad_norm": 0.005159405060112476, "learning_rate": 3.7500000000000005e-06, "loss": 0.0335, "num_tokens": 12823084.0, "reward": 1.0933313369750977, "reward_std": 0.24867603182792664, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6828031539916992, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9420938491821289, "step": 65 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6095026768103691, "calib/avg_num_step_conf": 3.59375, "calib/ece": 0.17389766551383395, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.05880450038743312, "calib/mean_conf": 0.1620707139328063, "calib/mu_c": 0.20135119047619052, "calib/mu_w": 0.1425466900887574, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0019762845849802375, "calib/std_conf": 0.13059729877194023, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2487.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 559.09375, "completions/mean_terminated_length": 559.09375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.0704, "grad_norm": 0.004662881605327129, "learning_rate": 3.7222222222222225e-06, "loss": 0.0442, "num_tokens": 13022132.0, "reward": 1.033068299293518, "reward_std": 0.19308070838451385, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.7494781017303467, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9098793864250183, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49861842105263154, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.23254117647058822, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004150000000000015, "calib/mean_conf": 0.17020392156862746, "calib/mu_c": 0.1676, "calib/mu_w": 0.17175, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.015098039215686273, "calib/std_conf": 0.1263740696298785, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2020.0, "completions/max_terminated_length": 2020.0, "completions/mean_length": 520.7578125, "completions/mean_terminated_length": 520.7578125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.07146666666666666, "grad_norm": 0.005110503640025854, "learning_rate": 3.694444444444445e-06, "loss": 0.0185, "num_tokens": 13210022.0, "reward": 1.0477032661437988, "reward_std": 0.2596127688884735, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6940404176712036, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9246066808700562, "step": 67 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5616442779643502, "calib/avg_num_step_conf": 3.41796875, "calib/ece": 0.23349609375000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02702337897184187, "calib/mean_conf": 0.17306640625000003, "calib/mu_c": 0.18974489795918367, "calib/mu_w": 0.1627215189873418, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011874999999999995, "calib/std_conf": 0.12009302824877037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1186.0, "completions/max_terminated_length": 1186.0, "completions/mean_length": 466.29296875, "completions/mean_terminated_length": 468.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07253333333333334, "grad_norm": 0.005158278625458479, "learning_rate": 3.6666666666666666e-06, "loss": 0.0324, "num_tokens": 13383049.0, "reward": 1.0714166164398193, "reward_std": 0.2271239459514618, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.7180866003036499, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9182429313659668, "step": 68 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47733602562317223, "calib/avg_num_step_conf": 3.62109375, "calib/ece": 0.1993470541007905, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010120581874042606, "calib/mean_conf": 0.15266875617588932, "calib/mu_c": 0.14598837209302326, "calib/mu_w": 0.15610895396706587, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006047430830039524, "calib/std_conf": 0.1012123059730517, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2079.0, "completions/max_terminated_length": 2079.0, "completions/mean_length": 551.28515625, "completions/mean_terminated_length": 553.4470825195312, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.0736, "grad_norm": 0.004912199918180704, "learning_rate": 3.638888888888889e-06, "loss": -0.0224, "num_tokens": 13578242.0, "reward": 1.0234334468841553, "reward_std": 0.21568524837493896, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.7155622243881226, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9251095652580261, "step": 69 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5060526315789473, "calib/avg_num_step_conf": 3.328125, "calib/ece": 0.258776862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.004752039473684216, "calib/mean_conf": 0.13808588235294117, "calib/mu_c": 0.1351042105263158, "calib/mu_w": 0.13985625000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01215686274509804, "calib/std_conf": 0.11369266672639318, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2276.0, "completions/max_terminated_length": 2276.0, "completions/mean_length": 519.53515625, "completions/mean_terminated_length": 519.53515625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.07466666666666667, "grad_norm": 0.004783127456903458, "learning_rate": 3.6111111111111115e-06, "loss": 0.0382, "num_tokens": 13767803.0, "reward": 1.0563266277313232, "reward_std": 0.21579626202583313, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6934038996696472, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9556857347488403, "step": 70 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5952913279132791, "calib/avg_num_step_conf": 3.37890625, "calib/ece": 0.2211625984251969, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027630000000000016, "calib/mean_conf": 0.14429015748031498, "calib/mu_c": 0.16213000000000002, "calib/mu_w": 0.1345, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.005561023622047244, "calib/std_conf": 0.11180132451347491, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2351.0, "completions/max_terminated_length": 2351.0, "completions/mean_length": 550.0703125, "completions/mean_terminated_length": 550.0703125, "completions/min_length": 212.0, "completions/min_terminated_length": 212.0, "epoch": 0.07573333333333333, "grad_norm": 0.0045869783498346806, "learning_rate": 3.5833333333333335e-06, "loss": 0.0494, "num_tokens": 13962597.0, "reward": 1.0412800312042236, "reward_std": 0.2335238754749298, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.7140564918518066, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9370072484016418, "step": 71 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5317827408484208, "calib/avg_num_step_conf": 3.4609375, "calib/ece": 0.25241993464052287, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038063851812695504, "calib/mean_conf": 0.13738398692810458, "calib/mu_c": 0.13978723404255322, "calib/mu_w": 0.13598084886128367, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.010588235294117648, "calib/std_conf": 0.10551586768304635, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 526.9375, "completions/mean_terminated_length": 529.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.0768, "grad_norm": 0.005466862581670284, "learning_rate": 3.555555555555556e-06, "loss": 0.0231, "num_tokens": 14151469.0, "reward": 1.0539318323135376, "reward_std": 0.21653851866722107, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6967944502830505, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9424508810043335, "step": 72 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5382015503875969, "calib/avg_num_step_conf": 4.1171875, "calib/ece": 0.37687499999999996, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010906279069767433, "calib/mean_conf": 0.1309990157480315, "calib/mu_c": 0.13636627906976745, "calib/mu_w": 0.12546000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.08915514020302921, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2230.0, "completions/max_terminated_length": 2230.0, "completions/mean_length": 545.1328125, "completions/mean_terminated_length": 545.1328125, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 0.07786666666666667, "grad_norm": 0.004794351290911436, "learning_rate": 3.5277777777777784e-06, "loss": 0.0239, "num_tokens": 14347623.0, "reward": 1.1213903427124023, "reward_std": 0.25519895553588867, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5963953733444214, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8833957314491272, "step": 73 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5284992784992785, "calib/avg_num_step_conf": 3.93359375, "calib/ece": 0.28555335968379447, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007283549783549753, "calib/mean_conf": 0.13041501976284586, "calib/mu_c": 0.13484848484848483, "calib/mu_w": 0.12756493506493508, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.01233201581027668, "calib/std_conf": 0.10675748111681319, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2497.0, "completions/max_terminated_length": 2497.0, "completions/mean_length": 557.109375, "completions/mean_terminated_length": 559.2941284179688, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.07893333333333333, "grad_norm": 0.005661599803715944, "learning_rate": 3.5e-06, "loss": 0.0405, "num_tokens": 14543739.0, "reward": 1.0442932844161987, "reward_std": 0.20066368579864502, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6766639351844788, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.884782612323761, "step": 74 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5533194759385236, "calib/avg_num_step_conf": 3.90234375, "calib/ece": 0.45474509803921576, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011798941798941781, "calib/mean_conf": 0.1264313725490196, "calib/mu_c": 0.13142857142857142, "calib/mu_w": 0.11962962962962964, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002352941176470588, "calib/std_conf": 0.10029471373905237, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 522.5234375, "completions/mean_terminated_length": 522.5234375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.08, "grad_norm": 0.004576115868985653, "learning_rate": 3.4722222222222224e-06, "loss": 0.0375, "num_tokens": 14731825.0, "reward": 1.1616411209106445, "reward_std": 0.2694687843322754, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5390676259994507, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8762416839599609, "step": 75 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5334039276087793, "calib/avg_num_step_conf": 3.875, "calib/ece": 0.3042391304347827, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013539500705942745, "calib/mean_conf": 0.11512845849802372, "calib/mu_c": 0.12299528301886792, "calib/mu_w": 0.10945578231292517, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00019762845849802388, "calib/std_conf": 0.08684471342230583, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1572.0, "completions/max_terminated_length": 1572.0, "completions/mean_length": 534.0390625, "completions/mean_terminated_length": 536.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.08106666666666666, "grad_norm": 0.004821740090847015, "learning_rate": 3.444444444444445e-06, "loss": 0.001, "num_tokens": 14921163.0, "reward": 1.0707972049713135, "reward_std": 0.20763981342315674, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6516541838645935, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8986301422119141, "step": 76 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46450094161958566, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.355094466403162, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002990991839296908, "calib/mean_conf": 0.11130869565217393, "calib/mu_c": 0.10971271186440679, "calib/mu_w": 0.1127037037037037, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.07664856665480077, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 579.21875, "completions/mean_terminated_length": 579.21875, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 0.08213333333333334, "grad_norm": 0.004549305886030197, "learning_rate": 3.416666666666667e-06, "loss": 0.0758, "num_tokens": 15123675.0, "reward": 1.0918402671813965, "reward_std": 0.24333447217941284, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6090283393859863, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9118044376373291, "step": 77 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6373342682304947, "calib/avg_num_step_conf": 3.9609375, "calib/ece": 0.2896259842519685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04521098929117799, "calib/mean_conf": 0.12809055118110235, "calib/mu_c": 0.15443396226415096, "calib/mu_w": 0.10922297297297297, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00019685039370078648, "calib/std_conf": 0.09555677945439664, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1615.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 598.26953125, "completions/mean_terminated_length": 600.61572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.0832, "grad_norm": 0.004515916109085083, "learning_rate": 3.3888888888888893e-06, "loss": 0.0004, "num_tokens": 15334424.0, "reward": 1.0832490921020508, "reward_std": 0.24944014847278595, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6785241365432739, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9243856072425842, "step": 78 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5331442974165091, "calib/avg_num_step_conf": 3.9609375, "calib/ece": 0.3256837944664031, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01637101449275366, "calib/mean_conf": 0.1288616600790514, "calib/mu_c": 0.13779130434782613, "calib/mu_w": 0.12142028985507247, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0951704142028216, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 621.0, "completions/mean_terminated_length": 621.0, "completions/min_length": 254.0, "completions/min_terminated_length": 254.0, "epoch": 0.08426666666666667, "grad_norm": 0.004430029075592756, "learning_rate": 3.3611111111111117e-06, "loss": 0.0133, "num_tokens": 15549344.0, "reward": 1.086816668510437, "reward_std": 0.22014813125133514, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6336302161216736, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8893812894821167, "step": 79 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6031269543464667, "calib/avg_num_step_conf": 4.11328125, "calib/ece": 0.35733201581027657, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03266010006253911, "calib/mean_conf": 0.1347628458498024, "calib/mu_c": 0.1515447154471545, "calib/mu_w": 0.1188846153846154, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0029644268774703555, "calib/std_conf": 0.11397568860024991, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1785.0, "completions/max_terminated_length": 1785.0, "completions/mean_length": 584.390625, "completions/mean_terminated_length": 586.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.08533333333333333, "grad_norm": 0.004645652137696743, "learning_rate": 3.3333333333333333e-06, "loss": -0.0148, "num_tokens": 15750676.0, "reward": 1.1073707342147827, "reward_std": 0.2834372818470001, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6226511001586914, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8669931888580322, "step": 80 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5599360000000001, "calib/avg_num_step_conf": 3.93359375, "calib/ece": 0.38594, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019559999999999994, "calib/mean_conf": 0.12206, "calib/mu_c": 0.13184, "calib/mu_w": 0.11228000000000002, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.004, "calib/std_conf": 0.09105743462233054, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 621.578125, "completions/mean_terminated_length": 621.578125, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.0864, "grad_norm": 0.004663328640162945, "learning_rate": 3.3055555555555558e-06, "loss": 0.0415, "num_tokens": 15965616.0, "reward": 1.1113719940185547, "reward_std": 0.2623317241668701, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5905662775039673, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9065430164337158, "step": 81 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4687655086848635, "calib/avg_num_step_conf": 4.40234375, "calib/ece": 0.37704724409448825, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0186637717121588, "calib/mean_conf": 0.15649606299212598, "calib/mu_c": 0.1473846153846154, "calib/mu_w": 0.1660483870967742, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.010866141732283464, "calib/std_conf": 0.12047774806899222, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2162.0, "completions/max_terminated_length": 2162.0, "completions/mean_length": 558.61328125, "completions/mean_terminated_length": 560.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.08746666666666666, "grad_norm": 0.004946217872202396, "learning_rate": 3.277777777777778e-06, "loss": 0.0335, "num_tokens": 16163741.0, "reward": 1.124467134475708, "reward_std": 0.23027293384075165, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5953612923622131, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8790208697319031, "step": 82 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47997747465899127, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.35001976284584974, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004486297084219715, "calib/mean_conf": 0.13535573122529646, "calib/mu_c": 0.13303278688524595, "calib/mu_w": 0.13751908396946566, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015810276679841893, "calib/std_conf": 0.09613412863544835, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1532.0, "completions/max_terminated_length": 1532.0, "completions/mean_length": 645.3515625, "completions/mean_terminated_length": 647.8823852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.08853333333333334, "grad_norm": 0.00416317256167531, "learning_rate": 3.2500000000000002e-06, "loss": 0.0055, "num_tokens": 16385783.0, "reward": 1.1054625511169434, "reward_std": 0.22977739572525024, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6112756729125977, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8977366089820862, "step": 83 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5805962274106319, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.34179693825910934, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.022164904448621583, "calib/mean_conf": 0.12379010627530367, "calib/mu_c": 0.13572505482456143, "calib/mu_w": 0.11356015037593985, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.0020242914979757085, "calib/std_conf": 0.0850828291266304, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 624.4375, "completions/mean_terminated_length": 624.4375, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.0896, "grad_norm": 0.004852856043726206, "learning_rate": 3.2222222222222227e-06, "loss": 0.0124, "num_tokens": 16601127.0, "reward": 1.0600621700286865, "reward_std": 0.2795252203941345, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6110243201255798, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8541374206542969, "step": 84 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5338028169014084, "calib/avg_num_step_conf": 4.98046875, "calib/ece": 0.3056746031746032, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011335467349551875, "calib/mean_conf": 0.1379761904761905, "calib/mu_c": 0.1443636363636364, "calib/mu_w": 0.1330281690140845, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.003571428571428572, "calib/std_conf": 0.09549926986222296, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 683.171875, "completions/mean_terminated_length": 683.171875, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.09066666666666667, "grad_norm": 0.004316552076488733, "learning_rate": 3.1944444444444443e-06, "loss": 0.0438, "num_tokens": 16833411.0, "reward": 1.0729950666427612, "reward_std": 0.2497747838497162, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6418659687042236, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9004353284835815, "step": 85 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5559251866637096, "calib/avg_num_step_conf": 5.08984375, "calib/ece": 0.20278790322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014732268795741812, "calib/mean_conf": 0.12785725806451617, "calib/mu_c": 0.13777777777777775, "calib/mu_w": 0.12304550898203594, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.002016129032258065, "calib/std_conf": 0.08425807386735486, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 703.96875, "completions/mean_terminated_length": 703.96875, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.09173333333333333, "grad_norm": 0.004142653662711382, "learning_rate": 3.1666666666666667e-06, "loss": 0.0529, "num_tokens": 17068707.0, "reward": 0.9890156388282776, "reward_std": 0.24995741248130798, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.7129498720169067, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.878600001335144, "step": 86 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49857884330202673, "calib/avg_num_step_conf": 4.72265625, "calib/ece": 0.41129411764705887, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015126050420167791, "calib/mean_conf": 0.12517647058823528, "calib/mu_c": 0.12588235294117645, "calib/mu_w": 0.12436974789915967, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.07816440839975973, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2935.0, "completions/max_terminated_length": 2935.0, "completions/mean_length": 623.95703125, "completions/mean_terminated_length": 623.95703125, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.0928, "grad_norm": 0.004567314404994249, "learning_rate": 3.138888888888889e-06, "loss": 0.055, "num_tokens": 17283504.0, "reward": 1.1310455799102783, "reward_std": 0.22217154502868652, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.576745331287384, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8488165140151978, "step": 87 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5628170233824074, "calib/avg_num_step_conf": 5.11328125, "calib/ece": 0.3302270588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02023581590993445, "calib/mean_conf": 0.13251803921568628, "calib/mu_c": 0.1433898305084746, "calib/mu_w": 0.12315401459854014, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.09096086590159028, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 699.09765625, "completions/mean_terminated_length": 701.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.09386666666666667, "grad_norm": 0.004271373618394136, "learning_rate": 3.1111111111111116e-06, "loss": 0.0154, "num_tokens": 17521889.0, "reward": 1.103813648223877, "reward_std": 0.25739848613739014, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.641609787940979, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8898475766181946, "step": 88 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5308606661581489, "calib/avg_num_step_conf": 5.8203125, "calib/ece": 0.32140873015873017, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005291762013729995, "calib/mean_conf": 0.1373214285714286, "calib/mu_c": 0.14021929824561405, "calib/mu_w": 0.13492753623188405, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0031746031746031746, "calib/std_conf": 0.07739860146714729, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 706.41015625, "completions/mean_terminated_length": 711.972412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.09493333333333333, "grad_norm": 0.004390403628349304, "learning_rate": 3.0833333333333336e-06, "loss": -0.0031, "num_tokens": 17761186.0, "reward": 1.0748932361602783, "reward_std": 0.20946909487247467, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6357358694076538, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8546633720397949, "step": 89 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5287195727769498, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.3502755905511812, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01585444610034774, "calib/mean_conf": 0.13003937007874017, "calib/mu_c": 0.13827868852459016, "calib/mu_w": 0.12242424242424242, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0737333887952076, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2627.0, "completions/max_terminated_length": 2627.0, "completions/mean_length": 691.5, "completions/mean_terminated_length": 694.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.096, "grad_norm": 0.0048269531689584255, "learning_rate": 3.055555555555556e-06, "loss": 0.015, "num_tokens": 17991098.0, "reward": 1.1092267036437988, "reward_std": 0.26769447326660156, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6252496242523193, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8832827806472778, "step": 90 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.53466796875, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.36828124999999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008437500000000014, "calib/mean_conf": 0.13171875000000002, "calib/mu_c": 0.13593750000000004, "calib/mu_w": 0.12750000000000003, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08170650003786419, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1779.0, "completions/max_terminated_length": 1779.0, "completions/mean_length": 659.5, "completions/mean_terminated_length": 662.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 305.0, "epoch": 0.09706666666666666, "grad_norm": 0.004437569063156843, "learning_rate": 3.0277777777777776e-06, "loss": 0.0352, "num_tokens": 18217210.0, "reward": 1.127094030380249, "reward_std": 0.231601744890213, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6119117736816406, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8845522999763489, "step": 91 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5619130004943153, "calib/avg_num_step_conf": 5.7890625, "calib/ece": 0.3348549019607844, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01652415966386564, "calib/mean_conf": 0.13338039215686276, "calib/mu_c": 0.14219327731092443, "calib/mu_w": 0.1256691176470588, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007843137254901972, "calib/std_conf": 0.08609361302540265, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 688.22265625, "completions/mean_terminated_length": 688.22265625, "completions/min_length": 231.0, "completions/min_terminated_length": 231.0, "epoch": 0.09813333333333334, "grad_norm": 0.004576823674142361, "learning_rate": 3e-06, "loss": 0.0586, "num_tokens": 18449683.0, "reward": 1.1030268669128418, "reward_std": 0.23424963653087616, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6383413076400757, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8776121139526367, "step": 92 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5508788159111934, "calib/avg_num_step_conf": 5.48046875, "calib/ece": 0.326875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011560900400863383, "calib/mean_conf": 0.13132812500000002, "calib/mu_c": 0.13769565217391302, "calib/mu_w": 0.12613475177304964, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0044921875, "calib/std_conf": 0.08498847434496265, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2590.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 739.68359375, "completions/mean_terminated_length": 742.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 0.0992, "grad_norm": 0.00454547256231308, "learning_rate": 2.9722222222222225e-06, "loss": 0.0572, "num_tokens": 18694386.0, "reward": 1.091695785522461, "reward_std": 0.24779178202152252, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6422584056854248, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.888515830039978, "step": 93 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5424498746867168, "calib/avg_num_step_conf": 5.17578125, "calib/ece": 0.33972332015810286, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004371553884711804, "calib/mean_conf": 0.14011857707509884, "calib/mu_c": 0.1424166666666667, "calib/mu_w": 0.1380451127819549, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0027667984189723317, "calib/std_conf": 0.08460782447972928, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2287.0, "completions/max_terminated_length": 2287.0, "completions/mean_length": 650.19140625, "completions/mean_terminated_length": 652.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.10026666666666667, "grad_norm": 0.00542377820238471, "learning_rate": 2.944444444444445e-06, "loss": -0.0067, "num_tokens": 18919083.0, "reward": 1.104982852935791, "reward_std": 0.2577821612358093, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6229070425033569, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9053668975830078, "step": 94 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48009672619047616, "calib/avg_num_step_conf": 5.37109375, "calib/ece": 0.43421874999999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002460317460317496, "calib/mean_conf": 0.1321875, "calib/mu_c": 0.13111111111111112, "calib/mu_w": 0.13357142857142862, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001953125, "calib/std_conf": 0.07958326516140188, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1770.0, "completions/max_terminated_length": 1770.0, "completions/mean_length": 678.62109375, "completions/mean_terminated_length": 681.2824096679688, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.10133333333333333, "grad_norm": 0.005023401230573654, "learning_rate": 2.916666666666667e-06, "loss": 0.0135, "num_tokens": 19148506.0, "reward": 1.1614655256271362, "reward_std": 0.2195737361907959, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5604507923126221, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.876522958278656, "step": 95 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5722301970507635, "calib/avg_num_step_conf": 5.54296875, "calib/ece": 0.4820352941176472, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02534921049197443, "calib/mean_conf": 0.13757254901960786, "calib/mu_c": 0.14721518987341775, "calib/mu_w": 0.12186597938144332, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.08083918696031313, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1962.0, "completions/max_terminated_length": 1962.0, "completions/mean_length": 673.98046875, "completions/mean_terminated_length": 676.6235961914062, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.1024, "grad_norm": 0.0050019919872283936, "learning_rate": 2.888888888888889e-06, "loss": 0.0301, "num_tokens": 19376429.0, "reward": 1.175472617149353, "reward_std": 0.20971368253231049, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5313574075698853, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7735506296157837, "step": 96 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5379130324059338, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.3053571428571428, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01376392691156808, "calib/mean_conf": 0.1509920634920635, "calib/mu_c": 0.1585840707964602, "calib/mu_w": 0.14482014388489212, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003968253968253968, "calib/std_conf": 0.0924930710430792, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 718.74609375, "completions/mean_terminated_length": 718.74609375, "completions/min_length": 262.0, "completions/min_terminated_length": 262.0, "epoch": 0.10346666666666667, "grad_norm": 0.00447913957759738, "learning_rate": 2.861111111111111e-06, "loss": 0.0821, "num_tokens": 19615068.0, "reward": 1.0893038511276245, "reward_std": 0.29794251918792725, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6482378840446472, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9029267430305481, "step": 97 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5587749862461031, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.33480468750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01786233877376367, "calib/mean_conf": 0.16128906250000002, "calib/mu_c": 0.17056910569105693, "calib/mu_w": 0.15270676691729326, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0078125, "calib/std_conf": 0.09623020513524376, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2347.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 712.3515625, "completions/mean_terminated_length": 715.1451416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.10453333333333334, "grad_norm": 0.0045817699283361435, "learning_rate": 2.8333333333333335e-06, "loss": -0.0015, "num_tokens": 19853182.0, "reward": 1.1275382041931152, "reward_std": 0.23887893557548523, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6481631398200989, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8919514417648315, "step": 98 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5609039548022599, "calib/avg_num_step_conf": 5.8671875, "calib/ece": 0.14642857142857146, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024311864406779626, "calib/mean_conf": 0.1511904761904762, "calib/mu_c": 0.16826666666666665, "calib/mu_w": 0.14395480225988702, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09621905516041881, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2619.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 799.125, "completions/mean_terminated_length": 802.2588500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 273.0, "epoch": 0.1056, "grad_norm": 0.005405294243246317, "learning_rate": 2.805555555555556e-06, "loss": 0.0019, "num_tokens": 20113126.0, "reward": 0.9942867755889893, "reward_std": 0.25218844413757324, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7545179128646851, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9040484428405762, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6169905956112852, "calib/avg_num_step_conf": 5.0703125, "calib/ece": 0.27356862745098043, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03631661442006276, "calib/mean_conf": 0.15780392156862746, "calib/mu_c": 0.1784545454545455, "calib/mu_w": 0.14213793103448275, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08920498966257286, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2284.0, "completions/max_terminated_length": 2284.0, "completions/mean_length": 762.2734375, "completions/mean_terminated_length": 765.2628173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.10666666666666667, "grad_norm": 0.004995453171432018, "learning_rate": 2.7777777777777783e-06, "loss": 0.0087, "num_tokens": 20365244.0, "reward": 1.1066558361053467, "reward_std": 0.24776574969291687, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6870343685150146, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9353673458099365, "step": 100 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5344317556411667, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.22607999999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007755916345624669, "calib/mean_conf": 0.14912, "calib/mu_c": 0.1540217391304348, "calib/mu_w": 0.14626582278481012, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0036, "calib/std_conf": 0.0820927865284155, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 833.26171875, "completions/mean_terminated_length": 839.8228149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 0.10773333333333333, "grad_norm": 0.0041248612105846405, "learning_rate": 2.7500000000000004e-06, "loss": 0.0384, "num_tokens": 20635119.0, "reward": 1.0370362997055054, "reward_std": 0.2543414235115051, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6995937824249268, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9208325147628784, "step": 101 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6121687196715193, "calib/avg_num_step_conf": 5.38671875, "calib/ece": 0.3730196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04410041060097053, "calib/mean_conf": 0.18149019607843137, "calib/mu_c": 0.2012056737588653, "calib/mu_w": 0.15710526315789478, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007843137254901959, "calib/std_conf": 0.1105180976305832, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2366.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 664.58203125, "completions/mean_terminated_length": 667.1882934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 260.0, "epoch": 0.1088, "grad_norm": 0.004917818587273359, "learning_rate": 2.7222222222222224e-06, "loss": 0.05, "num_tokens": 20861516.0, "reward": 1.1818873882293701, "reward_std": 0.2297108769416809, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6212344169616699, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8850804567337036, "step": 102 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5530331813576495, "calib/avg_num_step_conf": 5.1171875, "calib/ece": 0.27948616600790516, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015181737588652489, "calib/mean_conf": 0.17466403162055336, "calib/mu_c": 0.183125, "calib/mu_w": 0.16794326241134752, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005731225296442687, "calib/std_conf": 0.11337608273840528, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 854.40625, "completions/mean_terminated_length": 854.40625, "completions/min_length": 254.0, "completions/min_terminated_length": 254.0, "epoch": 0.10986666666666667, "grad_norm": 0.00374817568808794, "learning_rate": 2.6944444444444444e-06, "loss": -0.0022, "num_tokens": 21134364.0, "reward": 1.086442470550537, "reward_std": 0.2338075339794159, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6681621074676514, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8641331791877747, "step": 103 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6497222742307932, "calib/avg_num_step_conf": 5.66796875, "calib/ece": 0.2608203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04983648505273672, "calib/mean_conf": 0.16496093750000004, "calib/mu_c": 0.19357798165137619, "calib/mu_w": 0.14374149659863947, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.1012692807524626, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1872.0, "completions/max_terminated_length": 1872.0, "completions/mean_length": 763.73046875, "completions/mean_terminated_length": 766.7255249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 247.0, "epoch": 0.11093333333333333, "grad_norm": 0.005013539455831051, "learning_rate": 2.666666666666667e-06, "loss": 0.0487, "num_tokens": 21386127.0, "reward": 1.1125118732452393, "reward_std": 0.21147343516349792, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6977277398109436, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9530295133590698, "step": 104 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6449192782526115, "calib/avg_num_step_conf": 6.33984375, "calib/ece": 0.3046031746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0462222222222223, "calib/mean_conf": 0.15968253968253973, "calib/mu_c": 0.1844444444444445, "calib/mu_w": 0.1382222222222222, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09222502249662178, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2985.0, "completions/max_terminated_length": 2985.0, "completions/mean_length": 848.3359375, "completions/mean_terminated_length": 848.3359375, "completions/min_length": 262.0, "completions/min_terminated_length": 262.0, "epoch": 0.112, "grad_norm": 0.003992477897554636, "learning_rate": 2.6388888888888893e-06, "loss": 0.077, "num_tokens": 21658629.0, "reward": 1.1046290397644043, "reward_std": 0.2945854067802429, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6547304391860962, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8903048038482666, "step": 105 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.555663086913087, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.2707188235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0230074987512488, "calib/mean_conf": 0.16849686274509806, "calib/mu_c": 0.18139910714285717, "calib/mu_w": 0.15839160839160837, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.09524343912333272, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1927.0, "completions/max_terminated_length": 1927.0, "completions/mean_length": 709.49609375, "completions/mean_terminated_length": 712.2785034179688, "completions/min_length": 0.0, "completions/min_terminated_length": 298.0, "epoch": 0.11306666666666666, "grad_norm": 0.004593702964484692, "learning_rate": 2.6111111111111113e-06, "loss": 0.0054, "num_tokens": 21894412.0, "reward": 1.1057837009429932, "reward_std": 0.20206966996192932, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6796209812164307, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9170175790786743, "step": 106 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5216284987277354, "calib/avg_num_step_conf": 7.56640625, "calib/ece": 0.36422310756972115, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006578880407124649, "calib/mean_conf": 0.16326693227091635, "calib/mu_c": 0.166412213740458, "calib/mu_w": 0.15983333333333336, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0027888446215139444, "calib/std_conf": 0.100026313580564, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2786.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 759.50390625, "completions/mean_terminated_length": 768.5098876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.11413333333333334, "grad_norm": 0.004667887929826975, "learning_rate": 2.5833333333333337e-06, "loss": 0.0521, "num_tokens": 22143029.0, "reward": 1.139082431793213, "reward_std": 0.27158012986183167, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6031172275543213, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9110323190689087, "step": 107 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6322460391425909, "calib/avg_num_step_conf": 6.06640625, "calib/ece": 0.40083984375000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04283659521590555, "calib/mean_conf": 0.17025390625000003, "calib/mu_c": 0.18882758620689655, "calib/mu_w": 0.145990990990991, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00234375, "calib/std_conf": 0.09857398268745261, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 787.421875, "completions/mean_terminated_length": 790.5098266601562, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.1152, "grad_norm": 0.0049973903223872185, "learning_rate": 2.5555555555555557e-06, "loss": 0.0653, "num_tokens": 22397409.0, "reward": 1.1757099628448486, "reward_std": 0.2627198398113251, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6049295663833618, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8289180994033813, "step": 108 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6331740442655936, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.2698031496062992, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.044227867203219334, "calib/mean_conf": 0.1829527559055118, "calib/mu_c": 0.20767857142857146, "calib/mu_w": 0.16345070422535213, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005905511811023621, "calib/std_conf": 0.11584821445157273, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2828.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 799.94140625, "completions/mean_terminated_length": 803.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 326.0, "epoch": 0.11626666666666667, "grad_norm": 0.004739825148135424, "learning_rate": 2.5277777777777778e-06, "loss": 0.0353, "num_tokens": 22656362.0, "reward": 1.1025824546813965, "reward_std": 0.17916500568389893, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6898800134658813, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8836942911148071, "step": 109 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6187011841773747, "calib/avg_num_step_conf": 6.78515625, "calib/ece": 0.2365490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03931783824640972, "calib/mean_conf": 0.19168627450980394, "calib/mu_c": 0.21435185185185188, "calib/mu_w": 0.17503401360544216, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002352941176470588, "calib/std_conf": 0.10279190671507521, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 804.8203125, "completions/mean_terminated_length": 804.8203125, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 0.11733333333333333, "grad_norm": 0.004314524121582508, "learning_rate": 2.5e-06, "loss": 0.0856, "num_tokens": 22916884.0, "reward": 1.1057047843933105, "reward_std": 0.20166802406311035, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.7079530954360962, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9209755063056946, "step": 110 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6701754385964913, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.26134387351778654, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06722117794486221, "calib/mean_conf": 0.22007905138339923, "calib/mu_c": 0.25541666666666674, "calib/mu_w": 0.18819548872180453, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0035573122529644263, "calib/std_conf": 0.1224325000603879, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 760.046875, "completions/mean_terminated_length": 760.046875, "completions/min_length": 205.0, "completions/min_terminated_length": 205.0, "epoch": 0.1184, "grad_norm": 0.004434129223227501, "learning_rate": 2.4722222222222226e-06, "loss": 0.0852, "num_tokens": 23168432.0, "reward": 1.1298407316207886, "reward_std": 0.24583680927753448, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6888812780380249, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.874413013458252, "step": 111 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6314165042235218, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.24520783132530122, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0531809941520468, "calib/mean_conf": 0.2205090361445783, "calib/mu_c": 0.2493421052631579, "calib/mu_w": 0.1961611111111111, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.003942771084337349, "calib/std_conf": 0.11670969875640129, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 814.1015625, "completions/mean_terminated_length": 817.294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 314.0, "epoch": 0.11946666666666667, "grad_norm": 0.005339854396879673, "learning_rate": 2.4444444444444447e-06, "loss": 0.0667, "num_tokens": 23434330.0, "reward": 1.1077545881271362, "reward_std": 0.287481427192688, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6859410405158997, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8903862237930298, "step": 112 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6017952755905511, "calib/avg_num_step_conf": 6.5625, "calib/ece": 0.2574603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0441070866141732, "calib/mean_conf": 0.2385714285714286, "calib/mu_c": 0.26080000000000003, "calib/mu_w": 0.21669291338582683, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.12332184171504629, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 757.75390625, "completions/mean_terminated_length": 757.75390625, "completions/min_length": 263.0, "completions/min_terminated_length": 263.0, "epoch": 0.12053333333333334, "grad_norm": 0.004490803461521864, "learning_rate": 2.4166666666666667e-06, "loss": 0.0719, "num_tokens": 23683083.0, "reward": 1.1554641723632812, "reward_std": 0.30773234367370605, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6797835826873779, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9154144525527954, "step": 113 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6531410256410257, "calib/avg_num_step_conf": 5.98828125, "calib/ece": 0.3508267716535433, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06517307692307692, "calib/mean_conf": 0.24531496062992128, "calib/mu_c": 0.27199999999999996, "calib/mu_w": 0.20682692307692305, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00279527559055118, "calib/std_conf": 0.13760500361641664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2935.0, "completions/max_terminated_length": 2935.0, "completions/mean_length": 695.41796875, "completions/mean_terminated_length": 698.1451416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 228.0, "epoch": 0.1216, "grad_norm": 0.005772326607257128, "learning_rate": 2.388888888888889e-06, "loss": 0.0561, "num_tokens": 23915702.0, "reward": 1.2348957061767578, "reward_std": 0.20500947535037994, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6465035080909729, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9059506058692932, "step": 114 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6228125000000001, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.25059288537549407, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05632812500000001, "calib/mean_conf": 0.25849802371541497, "calib/mu_c": 0.286328125, "calib/mu_w": 0.22999999999999998, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015810276679841893, "calib/std_conf": 0.13231339681351792, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2939.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 753.23046875, "completions/mean_terminated_length": 756.1843872070312, "completions/min_length": 0.0, "completions/min_terminated_length": 290.0, "epoch": 0.12266666666666666, "grad_norm": 0.0052672624588012695, "learning_rate": 2.361111111111111e-06, "loss": 0.0282, "num_tokens": 24163361.0, "reward": 1.1711812019348145, "reward_std": 0.27813780307769775, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6912695169448853, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.906873345375061, "step": 115 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6094626094626096, "calib/avg_num_step_conf": 6.33984375, "calib/ece": 0.18622047244094492, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.052064512064512014, "calib/mean_conf": 0.26897637795275586, "calib/mu_c": 0.2982882882882883, "calib/mu_w": 0.24622377622377628, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009094488188976377, "calib/std_conf": 0.1398668356883198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 759.921875, "completions/mean_terminated_length": 765.905517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.12373333333333333, "grad_norm": 0.00614529475569725, "learning_rate": 2.3333333333333336e-06, "loss": 0.0101, "num_tokens": 24411989.0, "reward": 1.118959665298462, "reward_std": 0.2492750883102417, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7207992076873779, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.906114935874939, "step": 116 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.621289148308542, "calib/avg_num_step_conf": 5.53125, "calib/ece": 0.19062992125984254, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05585765392581429, "calib/mean_conf": 0.2652755905511811, "calib/mu_c": 0.29628318584070795, "calib/mu_w": 0.24042553191489366, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005511811023622047, "calib/std_conf": 0.13558172577751298, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 716.1640625, "completions/mean_terminated_length": 721.8031616210938, "completions/min_length": 0.0, "completions/min_terminated_length": 279.0, "epoch": 0.1248, "grad_norm": 0.006016943138092756, "learning_rate": 2.305555555555556e-06, "loss": 0.0153, "num_tokens": 24651495.0, "reward": 1.1388986110687256, "reward_std": 0.2641201615333557, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7242835760116577, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9445270299911499, "step": 117 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5930733267716536, "calib/avg_num_step_conf": 5.4296875, "calib/ece": 0.2245882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04120878444881887, "calib/mean_conf": 0.2840392156862745, "calib/mu_c": 0.3047244094488189, "calib/mu_w": 0.26351562500000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005294117647058823, "calib/std_conf": 0.13263453558842586, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2581.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 757.2265625, "completions/mean_terminated_length": 757.2265625, "completions/min_length": 274.0, "completions/min_terminated_length": 274.0, "epoch": 0.12586666666666665, "grad_norm": 0.0067602358758449554, "learning_rate": 2.277777777777778e-06, "loss": 0.0139, "num_tokens": 24898921.0, "reward": 1.1811814308166504, "reward_std": 0.24626702070236206, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7044574022293091, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9329982995986938, "step": 118 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.608679728987365, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.20527343750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.049603247268509953, "calib/mean_conf": 0.2994140625, "calib/mu_c": 0.32440944881889755, "calib/mu_w": 0.2748062015503876, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004296874999999999, "calib/std_conf": 0.1394382158959519, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2098.0, "completions/max_terminated_length": 2098.0, "completions/mean_length": 749.35546875, "completions/mean_terminated_length": 752.294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.12693333333333334, "grad_norm": 0.006964806001633406, "learning_rate": 2.25e-06, "loss": 0.0232, "num_tokens": 25145388.0, "reward": 1.1866703033447266, "reward_std": 0.3203437328338623, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.712939441204071, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9379895925521851, "step": 119 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5923692307692308, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.23207843137254905, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03647076923076914, "calib/mean_conf": 0.28635294117647064, "calib/mu_c": 0.3042307692307692, "calib/mu_w": 0.26776000000000005, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0043137254901960765, "calib/std_conf": 0.1233822092125644, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2776.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 724.1484375, "completions/mean_terminated_length": 724.1484375, "completions/min_length": 249.0, "completions/min_terminated_length": 249.0, "epoch": 0.128, "grad_norm": 0.0065616280771791935, "learning_rate": 2.222222222222222e-06, "loss": 0.0024, "num_tokens": 25387026.0, "reward": 1.19674551486969, "reward_std": 0.2652026414871216, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7004241943359375, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9564459919929504, "step": 120 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5782581453634086, "calib/avg_num_step_conf": 5.4375, "calib/ece": 0.2056521739130435, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03354636591478699, "calib/mean_conf": 0.27403162055335967, "calib/mu_c": 0.2916666666666667, "calib/mu_w": 0.2581203007518797, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0026877470355731225, "calib/std_conf": 0.11986962197682896, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 842.78125, "completions/mean_terminated_length": 846.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 298.0, "epoch": 0.12906666666666666, "grad_norm": 0.005478292237967253, "learning_rate": 2.1944444444444445e-06, "loss": 0.037, "num_tokens": 25657402.0, "reward": 1.145277500152588, "reward_std": 0.2719191908836365, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7006878852844238, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9109839200973511, "step": 121 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7136587522101541, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.2925486274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10433096741601411, "calib/mean_conf": 0.2878435294117647, "calib/mu_c": 0.3316216216216216, "calib/mu_w": 0.2272906542056075, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.13818226336658362, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2266.0, "completions/max_terminated_length": 2266.0, "completions/mean_length": 760.94921875, "completions/mean_terminated_length": 763.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.13013333333333332, "grad_norm": 0.008651395328342915, "learning_rate": 2.166666666666667e-06, "loss": 0.0267, "num_tokens": 25909117.0, "reward": 1.2535841464996338, "reward_std": 0.2683509588241577, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6973562240600586, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9102489948272705, "step": 122 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5751905751905751, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.13921259842519687, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0359201159201159, "calib/mean_conf": 0.2977952755905512, "calib/mu_c": 0.318018018018018, "calib/mu_w": 0.2820979020979021, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12826776487192332, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2402.0, "completions/max_terminated_length": 2402.0, "completions/mean_length": 804.0, "completions/mean_terminated_length": 807.1530151367188, "completions/min_length": 0.0, "completions/min_terminated_length": 280.0, "epoch": 0.1312, "grad_norm": 0.007704537827521563, "learning_rate": 2.138888888888889e-06, "loss": -0.0067, "num_tokens": 26169797.0, "reward": 1.120632290840149, "reward_std": 0.32466065883636475, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7235772609710693, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9072494506835938, "step": 123 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6204038389629815, "calib/avg_num_step_conf": 5.21484375, "calib/ece": 0.265764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04758132868004489, "calib/mean_conf": 0.2910980392156862, "calib/mu_c": 0.3121830985915493, "calib/mu_w": 0.2646017699115044, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.11255835173444453, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 762.0, "completions/mean_terminated_length": 764.98828125, "completions/min_length": 0.0, "completions/min_terminated_length": 271.0, "epoch": 0.13226666666666667, "grad_norm": 0.006237371359020472, "learning_rate": 2.1111111111111114e-06, "loss": -0.0043, "num_tokens": 26421253.0, "reward": 1.2234071493148804, "reward_std": 0.2788293957710266, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6825433373451233, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9160416126251221, "step": 124 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6500318877551021, "calib/avg_num_step_conf": 5.140625, "calib/ece": 0.14940476190476193, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06517857142857142, "calib/mean_conf": 0.2950396825396825, "calib/mu_c": 0.33124999999999993, "calib/mu_w": 0.2660714285714285, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.11745143547499669, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2554.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 784.125, "completions/mean_terminated_length": 790.2991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 269.0, "epoch": 0.13333333333333333, "grad_norm": 0.008086186833679676, "learning_rate": 2.0833333333333334e-06, "loss": 0.0272, "num_tokens": 26676365.0, "reward": 1.129145860671997, "reward_std": 0.2846543788909912, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.7306152582168579, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9147278070449829, "step": 125 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6464761086916518, "calib/avg_num_step_conf": 4.90234375, "calib/ece": 0.18035156250000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06242470710912096, "calib/mean_conf": 0.2844921875, "calib/mu_c": 0.3178991596638655, "calib/mu_w": 0.25547445255474455, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.12222440274537996, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1850.0, "completions/max_terminated_length": 1850.0, "completions/mean_length": 727.28125, "completions/mean_terminated_length": 730.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 287.0, "epoch": 0.1344, "grad_norm": 0.01193835400044918, "learning_rate": 2.0555555555555555e-06, "loss": -0.0005, "num_tokens": 26917581.0, "reward": 1.14719820022583, "reward_std": 0.21822646260261536, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7257269620895386, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8826517462730408, "step": 126 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5234159779614325, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.1990234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008971533516988095, "calib/mean_conf": 0.28535156250000004, "calib/mu_c": 0.2900826446280992, "calib/mu_w": 0.2811111111111111, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005859375000000002, "calib/std_conf": 0.12010035451158584, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1643.0, "completions/max_terminated_length": 1643.0, "completions/mean_length": 695.4453125, "completions/mean_terminated_length": 698.172607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 292.0, "epoch": 0.13546666666666668, "grad_norm": 0.009412859566509724, "learning_rate": 2.027777777777778e-06, "loss": 0.028, "num_tokens": 27148855.0, "reward": 1.157740831375122, "reward_std": 0.25142019987106323, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7024316191673279, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.937037467956543, "step": 127 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6896017699115045, "calib/avg_num_step_conf": 4.51953125, "calib/ece": 0.13102766798418977, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08932996207332483, "calib/mean_conf": 0.31561264822134383, "calib/mu_c": 0.36504424778761063, "calib/mu_w": 0.2757142857142858, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.1316635735460487, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 800.5859375, "completions/mean_terminated_length": 806.8897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.13653333333333334, "grad_norm": 0.006972191855311394, "learning_rate": 2.0000000000000003e-06, "loss": -0.0152, "num_tokens": 27410037.0, "reward": 1.1384683847427368, "reward_std": 0.27915820479393005, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7535644769668579, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8858070373535156, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5106669111803901, "calib/avg_num_step_conf": 4.375, "calib/ece": 0.186171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015294333394461068, "calib/mean_conf": 0.325546875, "calib/mu_c": 0.3263414634146341, "calib/mu_w": 0.324812030075188, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.015625, "calib/std_conf": 0.11882437535175337, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2110.0, "completions/max_terminated_length": 2110.0, "completions/mean_length": 679.5859375, "completions/mean_terminated_length": 682.2510375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.1376, "grad_norm": 0.007675653323531151, "learning_rate": 1.9722222222222224e-06, "loss": 0.0241, "num_tokens": 27635963.0, "reward": 1.1678061485290527, "reward_std": 0.26005449891090393, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.709470272064209, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9319714307785034, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6098653323449469, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.236640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04761304670126021, "calib/mean_conf": 0.323515625, "calib/mu_c": 0.34471830985915497, "calib/mu_w": 0.29710526315789476, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002734375, "calib/std_conf": 0.12224840134275532, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1929.0, "completions/max_terminated_length": 1929.0, "completions/mean_length": 682.43359375, "completions/mean_terminated_length": 685.10986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 289.0, "epoch": 0.13866666666666666, "grad_norm": 0.009352181106805801, "learning_rate": 1.944444444444445e-06, "loss": 0.0194, "num_tokens": 27865522.0, "reward": 1.2341196537017822, "reward_std": 0.2630813419818878, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7081273794174194, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9014734625816345, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5902677403813064, "calib/avg_num_step_conf": 4.37890625, "calib/ece": 0.07421259842519679, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0475084314130359, "calib/mean_conf": 0.32106299212598427, "calib/mu_c": 0.3522988505747126, "calib/mu_w": 0.3047904191616767, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.026377952755905473, "calib/std_conf": 0.12751120055984314, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2197.0, "completions/max_terminated_length": 2197.0, "completions/mean_length": 691.9453125, "completions/mean_terminated_length": 694.6588745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 294.0, "epoch": 0.13973333333333332, "grad_norm": 0.009569883346557617, "learning_rate": 1.916666666666667e-06, "loss": 0.0106, "num_tokens": 28098436.0, "reward": 1.0664312839508057, "reward_std": 0.18633168935775757, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.7733886241912842, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9470728635787964, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6649449671032405, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.23794921875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06923353624792478, "calib/mean_conf": 0.32220703125000005, "calib/mu_c": 0.3538489208633094, "calib/mu_w": 0.2846153846153846, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008593749999999999, "calib/std_conf": 0.11967041371747456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1993.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 726.44921875, "completions/mean_terminated_length": 729.298095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.1408, "grad_norm": 0.007241805549710989, "learning_rate": 1.888888888888889e-06, "loss": -0.0016, "num_tokens": 28339567.0, "reward": 1.2315657138824463, "reward_std": 0.30143308639526367, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7194007039070129, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9171492457389832, "step": 132 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5812566277836692, "calib/avg_num_step_conf": 4.7265625, "calib/ece": 0.11003906249999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.042606044538706145, "calib/mean_conf": 0.31183593749999994, "calib/mu_c": 0.3391304347826086, "calib/mu_w": 0.2965243902439025, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03125, "calib/std_conf": 0.12445250583453953, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1932.0, "completions/max_terminated_length": 1932.0, "completions/mean_length": 803.2265625, "completions/mean_terminated_length": 806.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.14186666666666667, "grad_norm": 0.010189288295805454, "learning_rate": 1.8611111111111113e-06, "loss": 0.0004, "num_tokens": 28601105.0, "reward": 1.0813331604003906, "reward_std": 0.2606811225414276, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.7691448926925659, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9511052370071411, "step": 133 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7081441922563418, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.14008097165991906, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09086114819759677, "calib/mean_conf": 0.3157894736842105, "calib/mu_c": 0.3672897196261682, "calib/mu_w": 0.2764285714285714, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.011336032388663979, "calib/std_conf": 0.11538390510471065, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 840.31640625, "completions/mean_terminated_length": 843.61181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.14293333333333333, "grad_norm": 0.006599487271159887, "learning_rate": 1.8333333333333333e-06, "loss": 0.0377, "num_tokens": 28874746.0, "reward": 1.1201388835906982, "reward_std": 0.2942541539669037, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.7448437213897705, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.9174302816390991, "step": 134 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6840958605664488, "calib/avg_num_step_conf": 4.8046875, "calib/ece": 0.23177165354330717, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08428571428571419, "calib/mean_conf": 0.30051181102362207, "calib/mu_c": 0.3399999999999999, "calib/mu_w": 0.2557142857142857, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.12558263371977746, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 805.5234375, "completions/mean_terminated_length": 805.5234375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.144, "grad_norm": 0.0070743318647146225, "learning_rate": 1.8055555555555557e-06, "loss": 0.0624, "num_tokens": 29136408.0, "reward": 1.2166517972946167, "reward_std": 0.3128998875617981, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7107660174369812, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9419503211975098, "step": 135 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6717162032598274, "calib/avg_num_step_conf": 5.078125, "calib/ece": 0.11996062992125991, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08394183445190168, "calib/mean_conf": 0.2934251968503937, "calib/mu_c": 0.34266666666666673, "calib/mu_w": 0.25872483221476505, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.13493830726185035, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1883.0, "completions/max_terminated_length": 1883.0, "completions/mean_length": 744.6328125, "completions/mean_terminated_length": 747.552978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.14506666666666668, "grad_norm": 0.007300595287233591, "learning_rate": 1.777777777777778e-06, "loss": 0.0186, "num_tokens": 29385090.0, "reward": 1.1226438283920288, "reward_std": 0.24612930417060852, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7596331834793091, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9338091015815735, "step": 136 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.656496062992126, "calib/avg_num_step_conf": 4.9296875, "calib/ece": 0.21733333333333332, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07552965059055111, "calib/mean_conf": 0.29011764705882354, "calib/mu_c": 0.327734375, "calib/mu_w": 0.2522047244094489, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002745098039215686, "calib/std_conf": 0.13646081777059865, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1667.0, "completions/max_terminated_length": 1667.0, "completions/mean_length": 747.72265625, "completions/mean_terminated_length": 750.6549682617188, "completions/min_length": 0.0, "completions/min_terminated_length": 276.0, "epoch": 0.14613333333333334, "grad_norm": 0.007695949170738459, "learning_rate": 1.75e-06, "loss": -0.0086, "num_tokens": 29633059.0, "reward": 1.1950247287750244, "reward_std": 0.27193596959114075, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7214398384094238, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9387810826301575, "step": 137 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6731679960119641, "calib/avg_num_step_conf": 4.703125, "calib/ece": 0.2249606299212598, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08588858424725809, "calib/mean_conf": 0.32149606299212596, "calib/mu_c": 0.3613970588235294, "calib/mu_w": 0.2755084745762713, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005511811023622047, "calib/std_conf": 0.13291450535036223, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 763.8671875, "completions/mean_terminated_length": 763.8671875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1472, "grad_norm": 0.007103401236236095, "learning_rate": 1.7222222222222224e-06, "loss": 0.0189, "num_tokens": 29882513.0, "reward": 1.227642297744751, "reward_std": 0.27762991189956665, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7248413562774658, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9233865737915039, "step": 138 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6292919799498746, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.22401574803149604, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06520050125313276, "calib/mean_conf": 0.32716535433070865, "calib/mu_c": 0.3564285714285714, "calib/mu_w": 0.2912280701754386, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.12708976081342904, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 726.91796875, "completions/mean_terminated_length": 729.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.14826666666666666, "grad_norm": 0.008045943453907967, "learning_rate": 1.6944444444444446e-06, "loss": 0.0131, "num_tokens": 30121268.0, "reward": 1.2315952777862549, "reward_std": 0.2970055341720581, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7129297256469727, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9161471128463745, "step": 139 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6031959953792838, "calib/avg_num_step_conf": 4.81640625, "calib/ece": 0.25620553359683795, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.050839430111667305, "calib/mean_conf": 0.3248221343873518, "calib/mu_c": 0.3461224489795918, "calib/mu_w": 0.2952830188679245, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.1297994347638294, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 751.359375, "completions/mean_terminated_length": 754.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 282.0, "epoch": 0.14933333333333335, "grad_norm": 0.007783446926623583, "learning_rate": 1.6666666666666667e-06, "loss": 0.0019, "num_tokens": 30368200.0, "reward": 1.2387367486953735, "reward_std": 0.26263174414634705, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6884863376617432, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8873493671417236, "step": 140 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6780106349702847, "calib/avg_num_step_conf": 4.375, "calib/ece": 0.18976377952755905, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09311854863934937, "calib/mean_conf": 0.35748031496062993, "calib/mu_c": 0.39964028776978416, "calib/mu_w": 0.3065217391304348, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.12589502253510024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2116.0, "completions/max_terminated_length": 2116.0, "completions/mean_length": 778.265625, "completions/mean_terminated_length": 781.3176879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 287.0, "epoch": 0.1504, "grad_norm": 0.00781600084155798, "learning_rate": 1.638888888888889e-06, "loss": -0.0249, "num_tokens": 30624100.0, "reward": 1.2407430410385132, "reward_std": 0.2818630337715149, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7406835556030273, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.912854790687561, "step": 141 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6918085969180859, "calib/avg_num_step_conf": 4.8125, "calib/ece": 0.12578740157480317, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10149416682263385, "calib/mean_conf": 0.33799212598425193, "calib/mu_c": 0.39273504273504267, "calib/mu_w": 0.2912408759124088, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0015748031496063033, "calib/std_conf": 0.13538914435965987, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 780.109375, "completions/mean_terminated_length": 783.168701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.15146666666666667, "grad_norm": 0.006971573457121849, "learning_rate": 1.6111111111111113e-06, "loss": -0.0166, "num_tokens": 30878536.0, "reward": 1.1672776937484741, "reward_std": 0.22784468531608582, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7551074028015137, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9370207786560059, "step": 142 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6846590909090909, "calib/avg_num_step_conf": 4.41015625, "calib/ece": 0.1681640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08969941348973592, "calib/mean_conf": 0.3474609375, "calib/mu_c": 0.3909090909090909, "calib/mu_w": 0.30120967741935495, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12725849691325564, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1880.0, "completions/max_terminated_length": 1880.0, "completions/mean_length": 707.3203125, "completions/mean_terminated_length": 710.0941772460938, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.15253333333333333, "grad_norm": 0.009549976326525211, "learning_rate": 1.5833333333333333e-06, "loss": -0.0363, "num_tokens": 31116514.0, "reward": 1.2209570407867432, "reward_std": 0.1799650490283966, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.746826171875, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9292377233505249, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.618385093167702, "calib/avg_num_step_conf": 4.265625, "calib/ece": 0.19478431372549018, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06617701863354036, "calib/mean_conf": 0.35894117647058826, "calib/mu_c": 0.38878571428571423, "calib/mu_w": 0.32260869565217387, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.002352941176470588, "calib/std_conf": 0.12579183111429176, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2107.0, "completions/max_terminated_length": 2107.0, "completions/mean_length": 731.86328125, "completions/mean_terminated_length": 734.7333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.1536, "grad_norm": 0.009908774867653847, "learning_rate": 1.5555555555555558e-06, "loss": -0.0001, "num_tokens": 31357567.0, "reward": 1.2265137434005737, "reward_std": 0.2905798554420471, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.718519926071167, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8893276453018188, "step": 144 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6308209618068772, "calib/avg_num_step_conf": 4.32421875, "calib/ece": 0.20098814229249007, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06806877299835035, "calib/mean_conf": 0.3602766798418972, "calib/mu_c": 0.39014084507042246, "calib/mu_w": 0.3220720720720721, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12750696535900893, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 731.765625, "completions/mean_terminated_length": 737.5275268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 274.0, "epoch": 0.15466666666666667, "grad_norm": 0.00937727466225624, "learning_rate": 1.527777777777778e-06, "loss": 0.0053, "num_tokens": 31597171.0, "reward": 1.247790813446045, "reward_std": 0.30426278710365295, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7220605611801147, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9329795837402344, "step": 145 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6259259259259259, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.07519841269841271, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07216049382716044, "calib/mean_conf": 0.3402777777777778, "calib/mu_c": 0.38666666666666666, "calib/mu_w": 0.3145061728395062, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.029166666666666667, "calib/std_conf": 0.12838558855043924, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2115.0, "completions/max_terminated_length": 2115.0, "completions/mean_length": 740.359375, "completions/mean_terminated_length": 743.2628173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.15573333333333333, "grad_norm": 0.009205873124301434, "learning_rate": 1.5e-06, "loss": -0.0267, "num_tokens": 31843487.0, "reward": 1.0692135095596313, "reward_std": 0.23088780045509338, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.7716699242591858, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9366390109062195, "step": 146 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6631589235681241, "calib/avg_num_step_conf": 4.6953125, "calib/ece": 0.08738095238095239, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.091305141069916, "calib/mean_conf": 0.32611111111111113, "calib/mu_c": 0.38009708737864084, "calib/mu_w": 0.28879194630872485, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0023809523809523807, "calib/std_conf": 0.14408425787922394, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2246.0, "completions/max_terminated_length": 2246.0, "completions/mean_length": 758.9921875, "completions/mean_terminated_length": 764.968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.1568, "grad_norm": 0.013621012680232525, "learning_rate": 1.4722222222222225e-06, "loss": 0.015, "num_tokens": 32091037.0, "reward": 1.111022710800171, "reward_std": 0.2638784348964691, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.7594866752624512, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9235550165176392, "step": 147 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7578185577668648, "calib/avg_num_step_conf": 4.375, "calib/ece": 0.2285714285714286, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1365210648746446, "calib/mean_conf": 0.3507936507936508, "calib/mu_c": 0.40821917808219177, "calib/mu_w": 0.27169811320754716, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.13779501208865644, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2877.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 721.51953125, "completions/mean_terminated_length": 730.0751342773438, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.15786666666666666, "grad_norm": 0.012164588086307049, "learning_rate": 1.4444444444444445e-06, "loss": 0.0066, "num_tokens": 32330426.0, "reward": 1.2605688571929932, "reward_std": 0.27147233486175537, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7398632764816284, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8875489234924316, "step": 148 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.708615801799581, "calib/avg_num_step_conf": 4.74609375, "calib/ece": 0.16666666666666666, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10054850240354973, "calib/mean_conf": 0.35490196078431374, "calib/mu_c": 0.4030075187969924, "calib/mu_w": 0.3024590163934427, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.121290003290115, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1916.0, "completions/max_terminated_length": 1916.0, "completions/mean_length": 774.609375, "completions/mean_terminated_length": 777.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 277.0, "epoch": 0.15893333333333334, "grad_norm": 0.011835869401693344, "learning_rate": 1.4166666666666667e-06, "loss": 0.0054, "num_tokens": 32582750.0, "reward": 1.22483229637146, "reward_std": 0.2820708155632019, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7489452958106995, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9123760461807251, "step": 149 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6881764153894276, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.11397637795275595, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10154207069127302, "calib/mean_conf": 0.3387795275590551, "calib/mu_c": 0.3943478260869565, "calib/mu_w": 0.2928057553956835, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.1381902382336896, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2055.0, "completions/max_terminated_length": 2055.0, "completions/mean_length": 642.6328125, "completions/mean_terminated_length": 645.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.16, "grad_norm": 0.012791814282536507, "learning_rate": 1.3888888888888892e-06, "loss": -0.0426, "num_tokens": 32801792.0, "reward": 1.1504539251327515, "reward_std": 0.3026912808418274, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7619433403015137, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8857413530349731, "step": 150 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.650921052631579, "calib/avg_num_step_conf": 4.4296875, "calib/ece": 0.08223529411764705, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09232565789473673, "calib/mean_conf": 0.3473333333333333, "calib/mu_c": 0.40526315789473677, "calib/mu_w": 0.31293750000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028509803921568627, "calib/std_conf": 0.1384166297765294, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 749.734375, "completions/mean_terminated_length": 752.674560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.16106666666666666, "grad_norm": 0.010117570869624615, "learning_rate": 1.3611111111111112e-06, "loss": 0.0284, "num_tokens": 33050316.0, "reward": 1.0854027271270752, "reward_std": 0.28477585315704346, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.7755120992660522, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9124618768692017, "step": 151 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6470532856335907, "calib/avg_num_step_conf": 4.53125, "calib/ece": 0.11559055118110238, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07615263917655185, "calib/mean_conf": 0.33763779527559057, "calib/mu_c": 0.3799115044247788, "calib/mu_w": 0.30375886524822693, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00417322834645669, "calib/std_conf": 0.13934580953336656, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 748.90625, "completions/mean_terminated_length": 748.90625, "completions/min_length": 240.0, "completions/min_terminated_length": 240.0, "epoch": 0.16213333333333332, "grad_norm": 0.007901887409389019, "learning_rate": 1.3333333333333334e-06, "loss": 0.0672, "num_tokens": 33296996.0, "reward": 1.1493442058563232, "reward_std": 0.26041844487190247, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.750516414642334, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9354065656661987, "step": 152 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6494514975964502, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.16725490196078435, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07670713669419454, "calib/mean_conf": 0.36254901960784314, "calib/mu_c": 0.3992481203007519, "calib/mu_w": 0.32254098360655736, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004117647058823533, "calib/std_conf": 0.1352534749980156, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2115.0, "completions/max_terminated_length": 2115.0, "completions/mean_length": 755.9765625, "completions/mean_terminated_length": 758.9412231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1632, "grad_norm": 0.00855367723852396, "learning_rate": 1.3055555555555556e-06, "loss": -0.0044, "num_tokens": 33547414.0, "reward": 1.226203203201294, "reward_std": 0.2390766739845276, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7422558069229126, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.943738579750061, "step": 153 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7154533844189017, "calib/avg_num_step_conf": 4.9140625, "calib/ece": 0.11565737051792832, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.11159131545338435, "calib/mean_conf": 0.3554980079681275, "calib/mu_c": 0.41551724137931034, "calib/mu_w": 0.303925925925926, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004501992031872515, "calib/std_conf": 0.1399745672545678, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2152.0, "completions/max_terminated_length": 2152.0, "completions/mean_length": 687.1875, "completions/mean_terminated_length": 689.8823852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.16426666666666667, "grad_norm": 0.0068125915713608265, "learning_rate": 1.2777777777777779e-06, "loss": -0.0034, "num_tokens": 33777342.0, "reward": 1.171915888786316, "reward_std": 0.2810779809951782, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7607855200767517, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9145296812057495, "step": 154 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.668653340668745, "calib/avg_num_step_conf": 4.46875, "calib/ece": 0.14265625000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09482608961427957, "calib/mean_conf": 0.34992187500000005, "calib/mu_c": 0.3991869918699187, "calib/mu_w": 0.3043609022556391, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0060546875, "calib/std_conf": 0.13898907383850131, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1938.0, "completions/max_terminated_length": 1938.0, "completions/mean_length": 658.85546875, "completions/mean_terminated_length": 661.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.16533333333333333, "grad_norm": 0.008058815263211727, "learning_rate": 1.25e-06, "loss": -0.0134, "num_tokens": 34002793.0, "reward": 1.1980299949645996, "reward_std": 0.27268341183662415, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7613617181777954, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9475213289260864, "step": 155 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6333750390991556, "calib/avg_num_step_conf": 4.578125, "calib/ece": 0.17881889763779532, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07418830153268685, "calib/mean_conf": 0.36842519685039377, "calib/mu_c": 0.4020143884892086, "calib/mu_w": 0.32782608695652177, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.13346045565404926, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2121.0, "completions/max_terminated_length": 2121.0, "completions/mean_length": 699.34375, "completions/mean_terminated_length": 702.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1664, "grad_norm": 0.006179318763315678, "learning_rate": 1.2222222222222223e-06, "loss": 0.0082, "num_tokens": 34236153.0, "reward": 1.245202660560608, "reward_std": 0.25565141439437866, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7334320545196533, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9451965093612671, "step": 156 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6549322527542104, "calib/avg_num_step_conf": 4.75390625, "calib/ece": 0.19882352941176476, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06824743573508923, "calib/mean_conf": 0.39176470588235296, "calib/mu_c": 0.4201342281879194, "calib/mu_w": 0.35188679245283017, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003137254901960784, "calib/std_conf": 0.1288365111195678, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1639.0, "completions/max_terminated_length": 1639.0, "completions/mean_length": 657.34375, "completions/mean_terminated_length": 659.921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.16746666666666668, "grad_norm": 0.011216006241738796, "learning_rate": 1.1944444444444446e-06, "loss": -0.0069, "num_tokens": 34457729.0, "reward": 1.2692151069641113, "reward_std": 0.2984997630119324, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7337108850479126, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8828759789466858, "step": 157 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6398305084745762, "calib/avg_num_step_conf": 4.53515625, "calib/ece": 0.1539525691699604, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05395480225988697, "calib/mean_conf": 0.39150197628458505, "calib/mu_c": 0.4166666666666667, "calib/mu_w": 0.3627118644067797, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00592885375494071, "calib/std_conf": 0.11531413100883842, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 664.3203125, "completions/mean_terminated_length": 664.3203125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.16853333333333334, "grad_norm": 0.006714844144880772, "learning_rate": 1.1666666666666668e-06, "loss": 0.0227, "num_tokens": 34682603.0, "reward": 1.2171711921691895, "reward_std": 0.31195053458213806, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7324901819229126, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9005794525146484, "step": 158 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6485275689223058, "calib/avg_num_step_conf": 4.31640625, "calib/ece": 0.1614173228346457, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07828947368421046, "calib/mean_conf": 0.4023622047244095, "calib/mu_c": 0.4375, "calib/mu_w": 0.35921052631578954, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006299212598425195, "calib/std_conf": 0.13271220827437258, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2811.0, "completions/max_terminated_length": 2811.0, "completions/mean_length": 679.140625, "completions/mean_terminated_length": 679.140625, "completions/min_length": 308.0, "completions/min_terminated_length": 308.0, "epoch": 0.1696, "grad_norm": 0.007278375793248415, "learning_rate": 1.138888888888889e-06, "loss": 0.0407, "num_tokens": 34910815.0, "reward": 1.2508426904678345, "reward_std": 0.31499069929122925, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7419726848602295, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9366129636764526, "step": 159 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7393737424547284, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.11145669291338584, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.13917756539235404, "calib/mean_conf": 0.3712992125984252, "calib/mu_c": 0.4491071428571428, "calib/mu_w": 0.3099295774647888, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02090551181102362, "calib/std_conf": 0.15462516602378776, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 686.33984375, "completions/mean_terminated_length": 691.7440795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 249.0, "epoch": 0.17066666666666666, "grad_norm": 0.007836407981812954, "learning_rate": 1.111111111111111e-06, "loss": -0.0089, "num_tokens": 35140926.0, "reward": 1.1535251140594482, "reward_std": 0.30200785398483276, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.7842183113098145, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9003516435623169, "step": 160 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7232659932659933, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.24921568627450985, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.12535353535353527, "calib/mean_conf": 0.4033333333333333, "calib/mu_c": 0.4475757575757575, "calib/mu_w": 0.32222222222222224, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002745098039215686, "calib/std_conf": 0.15592439931150479, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2544.0, "completions/max_terminated_length": 2544.0, "completions/mean_length": 677.19921875, "completions/mean_terminated_length": 677.19921875, "completions/min_length": 210.0, "completions/min_terminated_length": 210.0, "epoch": 0.17173333333333332, "grad_norm": 0.0068269157782197, "learning_rate": 1.0833333333333335e-06, "loss": 0.0355, "num_tokens": 35367777.0, "reward": 1.3364789485931396, "reward_std": 0.27876824140548706, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7393261790275574, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8922635912895203, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6952734489855924, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.28647058823529414, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.11860114672155236, "calib/mean_conf": 0.4154901960784314, "calib/mu_c": 0.4508379888268156, "calib/mu_w": 0.3322368421052632, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.1512213508824288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1442.0, "completions/max_terminated_length": 1442.0, "completions/mean_length": 628.0078125, "completions/mean_terminated_length": 630.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.1728, "grad_norm": 0.008862740360200405, "learning_rate": 1.0555555555555557e-06, "loss": -0.002, "num_tokens": 35582259.0, "reward": 1.3880972862243652, "reward_std": 0.2838217616081238, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7326074242591858, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8918615579605103, "step": 162 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7099097368926446, "calib/avg_num_step_conf": 5.32421875, "calib/ece": 0.11000000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.12084373599641501, "calib/mean_conf": 0.3996, "calib/mu_c": 0.45905511811023625, "calib/mu_w": 0.33821138211382124, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0007999999999999998, "calib/std_conf": 0.1613686462730601, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2617.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 718.16796875, "completions/mean_terminated_length": 720.984375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.17386666666666667, "grad_norm": 0.006122746970504522, "learning_rate": 1.0277777777777777e-06, "loss": 0.0315, "num_tokens": 35820510.0, "reward": 1.2001893520355225, "reward_std": 0.3164390027523041, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7545703053474426, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9166164398193359, "step": 163 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7324094488188977, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.12003968253968257, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.14871496062992123, "calib/mean_conf": 0.393452380952381, "calib/mu_c": 0.46840000000000004, "calib/mu_w": 0.3196850393700788, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.008730158730158734, "calib/std_conf": 0.1712662508634064, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 777.6171875, "completions/mean_terminated_length": 780.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.17493333333333333, "grad_norm": 0.0072492267936468124, "learning_rate": 1.0000000000000002e-06, "loss": 0.0045, "num_tokens": 36075284.0, "reward": 1.1992881298065186, "reward_std": 0.30885034799575806, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7693262100219727, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9131876230239868, "step": 164 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5890151515151516, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.07893700787401575, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06381313131313132, "calib/mean_conf": 0.4179133858267717, "calib/mu_c": 0.4540909090909091, "calib/mu_w": 0.3902777777777778, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03188976377952755, "calib/std_conf": 0.16732017135444094, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 740.99609375, "completions/mean_terminated_length": 743.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 244.0, "epoch": 0.176, "grad_norm": 0.006283737253397703, "learning_rate": 9.722222222222224e-07, "loss": 0.0443, "num_tokens": 36320123.0, "reward": 1.1393153667449951, "reward_std": 0.3001851737499237, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7516698837280273, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9382967352867126, "step": 165 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7321003134796238, "calib/avg_num_step_conf": 4.9453125, "calib/ece": 0.15313725490196073, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1615987460815047, "calib/mean_conf": 0.4209803921568627, "calib/mu_c": 0.4906896551724138, "calib/mu_w": 0.3290909090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002745098039215686, "calib/std_conf": 0.19783390638991402, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 769.99609375, "completions/mean_terminated_length": 769.99609375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.17706666666666668, "grad_norm": 0.0065361512824893, "learning_rate": 9.444444444444445e-07, "loss": -0.005, "num_tokens": 36572994.0, "reward": 1.285456657409668, "reward_std": 0.2910774350166321, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7700293064117432, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.937705397605896, "step": 166 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7274933510638298, "calib/avg_num_step_conf": 5.4140625, "calib/ece": 0.16181102362204725, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1665957446808511, "calib/mean_conf": 0.47834645669291337, "calib/mu_c": 0.54, "calib/mu_w": 0.37340425531914895, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005118110236220475, "calib/std_conf": 0.1908425734519834, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 705.22265625, "completions/mean_terminated_length": 707.98828125, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.17813333333333334, "grad_norm": 0.006597655359655619, "learning_rate": 9.166666666666666e-07, "loss": 0.0658, "num_tokens": 36808707.0, "reward": 1.3329412937164307, "reward_std": 0.2636227607727051, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7790234088897705, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8768430948257446, "step": 167 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7365968095089147, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.072244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.18941194870190792, "calib/mean_conf": 0.47539370078740156, "calib/mu_c": 0.5611510791366906, "calib/mu_w": 0.3717391304347827, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.00019685039370078756, "calib/std_conf": 0.21914930938252944, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2074.0, "completions/max_terminated_length": 2074.0, "completions/mean_length": 747.875, "completions/mean_terminated_length": 750.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 269.0, "epoch": 0.1792, "grad_norm": 0.006707758642733097, "learning_rate": 8.88888888888889e-07, "loss": 0.0183, "num_tokens": 37054403.0, "reward": 1.2594422101974487, "reward_std": 0.31842613220214844, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7792090177536011, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9137257933616638, "step": 168 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7206911636045494, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.14940711462450587, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1576115485564304, "calib/mean_conf": 0.4624505928853755, "calib/mu_c": 0.5409448818897636, "calib/mu_w": 0.38333333333333325, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05494071146245057, "calib/std_conf": 0.2055387275127333, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2765.0, "completions/max_terminated_length": 2765.0, "completions/mean_length": 686.23828125, "completions/mean_terminated_length": 691.6417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.18026666666666666, "grad_norm": 0.0067485845647752285, "learning_rate": 8.611111111111112e-07, "loss": 0.016, "num_tokens": 37283832.0, "reward": 1.2103452682495117, "reward_std": 0.3088925778865814, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7721386551856995, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.918978750705719, "step": 169 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7272755708476698, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.08267716535433067, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.17380043791054095, "calib/mean_conf": 0.4968503937007875, "calib/mu_c": 0.5755395683453236, "calib/mu_w": 0.40173913043478265, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.016141732283464535, "calib/std_conf": 0.2194972809811382, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2175.0, "completions/max_terminated_length": 2175.0, "completions/mean_length": 732.2421875, "completions/mean_terminated_length": 735.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 261.0, "epoch": 0.18133333333333335, "grad_norm": 0.00853640865534544, "learning_rate": 8.333333333333333e-07, "loss": 0.0037, "num_tokens": 37525006.0, "reward": 1.2708096504211426, "reward_std": 0.32346639037132263, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7814843654632568, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9515193700790405, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7047496790757382, "calib/avg_num_step_conf": 5.05078125, "calib/ece": 0.05898437499999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.16773030136316391, "calib/mean_conf": 0.513671875, "calib/mu_c": 0.6008130081300812, "calib/mu_w": 0.4330827067669173, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04609374999999992, "calib/std_conf": 0.2147140943999354, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1751.0, "completions/max_terminated_length": 1751.0, "completions/mean_length": 688.6953125, "completions/mean_terminated_length": 691.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.1824, "grad_norm": 0.0080631198361516, "learning_rate": 8.055555555555557e-07, "loss": 0.0058, "num_tokens": 37757776.0, "reward": 1.2102100849151611, "reward_std": 0.3139841556549072, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.786914050579071, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9451368451118469, "step": 171 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6152890983188997, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.138470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10397733061640335, "calib/mean_conf": 0.5336862745098038, "calib/mu_c": 0.5760927152317881, "calib/mu_w": 0.4721153846153847, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03999999999999993, "calib/std_conf": 0.21907770044260272, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1948.0, "completions/max_terminated_length": 1948.0, "completions/mean_length": 701.69140625, "completions/mean_terminated_length": 704.4431762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.18346666666666667, "grad_norm": 0.00572752533480525, "learning_rate": 7.777777777777779e-07, "loss": 0.0024, "num_tokens": 37990329.0, "reward": 1.3001487255096436, "reward_std": 0.32377946376800537, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7514136433601379, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9415172338485718, "step": 172 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6844036697247706, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.08795275590551173, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.16594115786143604, "calib/mean_conf": 0.5391338582677165, "calib/mu_c": 0.6103448275862068, "calib/mu_w": 0.44440366972477074, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028110236220472373, "calib/std_conf": 0.24909291698995098, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 742.8671875, "completions/mean_terminated_length": 745.7804565429688, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.18453333333333333, "grad_norm": 0.007476532366126776, "learning_rate": 7.5e-07, "loss": 0.0116, "num_tokens": 38233231.0, "reward": 1.255610704421997, "reward_std": 0.3569577932357788, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7596132755279541, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8438410758972168, "step": 173 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6579028670291646, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.1231372549019607, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1495798319327729, "calib/mean_conf": 0.5050980392156863, "calib/mu_c": 0.5848739495798317, "calib/mu_w": 0.43529411764705883, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08078431372549011, "calib/std_conf": 0.2633373483115587, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2183.0, "completions/max_terminated_length": 2183.0, "completions/mean_length": 760.5703125, "completions/mean_terminated_length": 763.552978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.1856, "grad_norm": 0.008986209519207478, "learning_rate": 7.222222222222222e-07, "loss": -0.0014, "num_tokens": 38481737.0, "reward": 1.162156581878662, "reward_std": 0.35894882678985596, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7517968416213989, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8872202634811401, "step": 174 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7116161616161616, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.1545098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.19303030303030305, "calib/mean_conf": 0.49176470588235294, "calib/mu_c": 0.6166666666666667, "calib/mu_w": 0.42363636363636364, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1466666666666666, "calib/std_conf": 0.2593559586054062, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2195.0, "completions/max_terminated_length": 2195.0, "completions/mean_length": 713.25390625, "completions/mean_terminated_length": 716.051025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.18666666666666668, "grad_norm": 0.00823708064854145, "learning_rate": 6.944444444444446e-07, "loss": 0.0121, "num_tokens": 38719722.0, "reward": 1.0632379055023193, "reward_std": 0.2859160304069519, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.7635546922683716, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9242798089981079, "step": 175 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6838009163371619, "calib/avg_num_step_conf": 4.75, "calib/ece": 0.11338582677165346, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.17504550304399663, "calib/mean_conf": 0.5228346456692914, "calib/mu_c": 0.600709219858156, "calib/mu_w": 0.4256637168141593, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04055118110236213, "calib/std_conf": 0.26298976848394057, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2802.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 722.8046875, "completions/mean_terminated_length": 725.6392822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.18773333333333334, "grad_norm": 0.0072695789858698845, "learning_rate": 6.666666666666667e-07, "loss": -0.0148, "num_tokens": 38958392.0, "reward": 1.2607662677764893, "reward_std": 0.33970561623573303, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7595312595367432, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9255650043487549, "step": 176 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7230902777777777, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.10629921259842512, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.20225694444444448, "calib/mean_conf": 0.5574803149606299, "calib/mu_c": 0.6578125, "calib/mu_w": 0.45555555555555555, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0799212598425196, "calib/std_conf": 0.24973046416208308, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2318.0, "completions/max_terminated_length": 2318.0, "completions/mean_length": 681.96875, "completions/mean_terminated_length": 687.3385620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.1888, "grad_norm": 0.009458135813474655, "learning_rate": 6.388888888888889e-07, "loss": -0.0117, "num_tokens": 39186376.0, "reward": 1.2171062231063843, "reward_std": 0.31858891248703003, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7768359184265137, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9194402098655701, "step": 177 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6539864185110664, "calib/avg_num_step_conf": 4.875, "calib/ece": 0.11259842519685043, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.14769869215291742, "calib/mean_conf": 0.5834645669291338, "calib/mu_c": 0.6485915492957746, "calib/mu_w": 0.5008928571428571, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06850393700787406, "calib/std_conf": 0.2376935695311634, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 695.7109375, "completions/mean_terminated_length": 695.7109375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.18986666666666666, "grad_norm": 0.009171919897198677, "learning_rate": 6.111111111111112e-07, "loss": 0.0427, "num_tokens": 39420118.0, "reward": 1.2672481536865234, "reward_std": 0.3861093521118164, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7632031440734863, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9269611239433289, "step": 178 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7712779156327544, "calib/avg_num_step_conf": 4.8671875, "calib/ece": 0.07637795275590548, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.24673697270471462, "calib/mean_conf": 0.5803149606299212, "calib/mu_c": 0.7007692307692307, "calib/mu_w": 0.4540322580645161, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07244094488188975, "calib/std_conf": 0.24367248255097973, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2227.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 730.51171875, "completions/mean_terminated_length": 733.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.19093333333333334, "grad_norm": 0.007313928566873074, "learning_rate": 5.833333333333334e-07, "loss": 0.0005, "num_tokens": 39662961.0, "reward": 1.2360790967941284, "reward_std": 0.3437637686729431, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.8030468225479126, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.910097599029541, "step": 179 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7613728045677403, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.07653543307086605, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.25783156457518763, "calib/mean_conf": 0.5659842519685039, "calib/mu_c": 0.6908396946564884, "calib/mu_w": 0.4330081300813008, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06338582677165346, "calib/std_conf": 0.2688536757505799, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2112.0, "completions/max_terminated_length": 2112.0, "completions/mean_length": 749.921875, "completions/mean_terminated_length": 752.86279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.192, "grad_norm": 0.008829567581415176, "learning_rate": 5.555555555555555e-07, "loss": 0.0134, "num_tokens": 39908365.0, "reward": 1.2370309829711914, "reward_std": 0.32971593737602234, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7979468703269958, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9084804058074951, "step": 180 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6950911554570092, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.1006666666666666, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.17834996304508488, "calib/mean_conf": 0.602078431372549, "calib/mu_c": 0.6881060606060605, "calib/mu_w": 0.5097560975609756, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09254901960784306, "calib/std_conf": 0.23888465040990176, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1437.0, "completions/max_terminated_length": 1437.0, "completions/mean_length": 650.80859375, "completions/mean_terminated_length": 653.36083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.19306666666666666, "grad_norm": 0.009881153702735901, "learning_rate": 5.277777777777779e-07, "loss": 0.012, "num_tokens": 40130804.0, "reward": 1.2393596172332764, "reward_std": 0.3540980815887451, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7721527218818665, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9521951079368591, "step": 181 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6462646264626463, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.10745098039215678, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1442908576571943, "calib/mean_conf": 0.6188235294117648, "calib/mu_c": 0.6759740259740259, "calib/mu_w": 0.5316831683168316, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06117647058823522, "calib/std_conf": 0.24277518898673164, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2588.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 716.609375, "completions/mean_terminated_length": 716.609375, "completions/min_length": 253.0, "completions/min_terminated_length": 253.0, "epoch": 0.19413333333333332, "grad_norm": 0.00817347876727581, "learning_rate": 5.000000000000001e-07, "loss": 0.022, "num_tokens": 40369984.0, "reward": 1.3225343227386475, "reward_std": 0.344273179769516, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7676562070846558, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9501374959945679, "step": 182 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6737820512820513, "calib/avg_num_step_conf": 4.9609375, "calib/ece": 0.09499999999999983, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.16467307692307664, "calib/mean_conf": 0.5965748031496062, "calib/mu_c": 0.6639999999999998, "calib/mu_w": 0.49932692307692317, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05051181102362188, "calib/std_conf": 0.24995999741958835, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1862.0, "completions/max_terminated_length": 1862.0, "completions/mean_length": 717.5078125, "completions/mean_terminated_length": 720.3215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.1952, "grad_norm": 0.007916688919067383, "learning_rate": 4.7222222222222226e-07, "loss": -0.0172, "num_tokens": 40609914.0, "reward": 1.2971291542053223, "reward_std": 0.3649140000343323, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7657812237739563, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9178915023803711, "step": 183 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6673937429889069, "calib/avg_num_step_conf": 5.0390625, "calib/ece": 0.12509803921568619, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.17651751215256128, "calib/mean_conf": 0.6443137254901962, "calib/mu_c": 0.7225352112676054, "calib/mu_w": 0.5460176991150442, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10627450980392147, "calib/std_conf": 0.23355106005914036, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 705.1484375, "completions/mean_terminated_length": 707.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 261.0, "epoch": 0.19626666666666667, "grad_norm": 0.0070179542526602745, "learning_rate": 4.444444444444445e-07, "loss": -0.0094, "num_tokens": 40845280.0, "reward": 1.2693936824798584, "reward_std": 0.41129761934280396, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7751171588897705, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9101529717445374, "step": 184 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6951195673421516, "calib/avg_num_step_conf": 4.73046875, "calib/ece": 0.07976190476190462, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.21095328077148623, "calib/mean_conf": 0.6305555555555555, "calib/mu_c": 0.7167785234899328, "calib/mu_w": 0.5058252427184465, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0595238095238094, "calib/std_conf": 0.2568870592289431, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 706.44921875, "completions/mean_terminated_length": 712.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 0.19733333333333333, "grad_norm": 0.009228321723639965, "learning_rate": 4.1666666666666667e-07, "loss": 0.0099, "num_tokens": 41082619.0, "reward": 1.2966680526733398, "reward_std": 0.3201305866241455, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7803710699081421, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9040552377700806, "step": 185 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7155020736458464, "calib/avg_num_step_conf": 4.453125, "calib/ece": 0.10078431372549013, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.23023752670604486, "calib/mean_conf": 0.6098039215686275, "calib/mu_c": 0.7082191780821917, "calib/mu_w": 0.47798165137614684, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06901960784313718, "calib/std_conf": 0.2630551287637917, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 712.1640625, "completions/mean_terminated_length": 714.9569091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1984, "grad_norm": 0.008529865182936192, "learning_rate": 3.8888888888888895e-07, "loss": 0.0151, "num_tokens": 41319541.0, "reward": 1.295252799987793, "reward_std": 0.276530921459198, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7942577600479126, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.912807822227478, "step": 186 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6277372262773723, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.15464566929133847, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.14027325472580954, "calib/mean_conf": 0.6207874015748032, "calib/mu_c": 0.6854014598540146, "calib/mu_w": 0.545128205128205, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.11803149606299203, "calib/std_conf": 0.2515749017288139, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 759.5625, "completions/mean_terminated_length": 765.5432739257812, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.19946666666666665, "grad_norm": 0.007863198406994343, "learning_rate": 3.611111111111111e-07, "loss": 0.008, "num_tokens": 41565101.0, "reward": 1.2347161769866943, "reward_std": 0.37129467725753784, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7425335645675659, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9178602695465088, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6633675615489298, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.12078431372549013, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.17320920450327848, "calib/mean_conf": 0.6125490196078431, "calib/mu_c": 0.6927007299270073, "calib/mu_w": 0.5194915254237288, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09803921568627445, "calib/std_conf": 0.25575603523252854, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 778.62890625, "completions/mean_terminated_length": 778.62890625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.20053333333333334, "grad_norm": 0.0075156791135668755, "learning_rate": 3.3333333333333335e-07, "loss": -0.0214, "num_tokens": 41818070.0, "reward": 1.2528278827667236, "reward_std": 0.3762345016002655, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7620312571525574, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9497491717338562, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6701410367437832, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.10498039215686268, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1769355437337622, "calib/mean_conf": 0.610313725490196, "calib/mu_c": 0.6921897810218977, "calib/mu_w": 0.5152542372881355, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0890196078431372, "calib/std_conf": 0.25633751419595213, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2004.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 718.81640625, "completions/mean_terminated_length": 721.6353149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.2016, "grad_norm": 0.007998441345989704, "learning_rate": 3.055555555555556e-07, "loss": -0.0061, "num_tokens": 42059423.0, "reward": 1.2445268630981445, "reward_std": 0.31320852041244507, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7653167247772217, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9084115028381348, "step": 189 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.708544344890274, "calib/avg_num_step_conf": 4.60546875, "calib/ece": 0.09110671936758888, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.21002791845214897, "calib/mean_conf": 0.6199604743083004, "calib/mu_c": 0.7046357615894039, "calib/mu_w": 0.4946078431372549, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0571146245059288, "calib/std_conf": 0.2519622564490044, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2804.0, "completions/max_terminated_length": 2804.0, "completions/mean_length": 747.3828125, "completions/mean_terminated_length": 750.3137817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.20266666666666666, "grad_norm": 0.006241267081350088, "learning_rate": 2.7777777777777776e-07, "loss": 0.0004, "num_tokens": 42305929.0, "reward": 1.312222957611084, "reward_std": 0.37045514583587646, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7818261384963989, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9180518388748169, "step": 190 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6680723276467958, "calib/avg_num_step_conf": 4.5625, "calib/ece": 0.2027777777777777, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.16191297680659356, "calib/mean_conf": 0.6337301587301588, "calib/mu_c": 0.7243243243243241, "calib/mu_w": 0.5624113475177306, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19801587301587295, "calib/std_conf": 0.24400180238400182, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 688.05859375, "completions/mean_terminated_length": 690.7568969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.20373333333333332, "grad_norm": 0.006758649367839098, "learning_rate": 2.5000000000000004e-07, "loss": 0.0131, "num_tokens": 42535808.0, "reward": 1.123604416847229, "reward_std": 0.31110677123069763, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7249609231948853, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9163706302642822, "step": 191 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.725224947614939, "calib/avg_num_step_conf": 4.5390625, "calib/ece": 0.12352941176470576, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.2244545790706272, "calib/mean_conf": 0.5941176470588235, "calib/mu_c": 0.7015037593984961, "calib/mu_w": 0.47704918032786886, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0980392156862744, "calib/std_conf": 0.26450973124608373, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2315.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 753.6171875, "completions/mean_terminated_length": 756.5725708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.2048, "grad_norm": 0.007922283373773098, "learning_rate": 2.2222222222222224e-07, "loss": 0.0244, "num_tokens": 42783278.0, "reward": 1.24484121799469, "reward_std": 0.3552216589450836, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7841796875, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9344432353973389, "step": 192 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7034450063211125, "calib/avg_num_step_conf": 4.72265625, "calib/ece": 0.09841897233201571, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.2025347661188368, "calib/mean_conf": 0.6359683794466402, "calib/mu_c": 0.7264285714285713, "calib/mu_w": 0.5238938053097345, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09051383399209477, "calib/std_conf": 0.24367645031655355, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2646.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 739.2421875, "completions/mean_terminated_length": 745.06298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.20586666666666667, "grad_norm": 0.0071614328771829605, "learning_rate": 1.9444444444444447e-07, "loss": 0.0165, "num_tokens": 43027804.0, "reward": 1.2722474336624146, "reward_std": 0.4499852657318115, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.777539074420929, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.935474157333374, "step": 193 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6926573426573426, "calib/avg_num_step_conf": 4.64453125, "calib/ece": 0.11832669322709152, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.20513032422123328, "calib/mean_conf": 0.598804780876494, "calib/mu_c": 0.6976923076923076, "calib/mu_w": 0.4925619834710743, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0996015936254979, "calib/std_conf": 0.2677160402817791, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2681.0, "completions/max_terminated_length": 2681.0, "completions/mean_length": 711.04296875, "completions/mean_terminated_length": 713.8314208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.20693333333333333, "grad_norm": 0.00920792669057846, "learning_rate": 1.6666666666666668e-07, "loss": 0.0156, "num_tokens": 43265343.0, "reward": 1.2192623615264893, "reward_std": 0.3363499343395233, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7594140768051147, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9191587567329407, "step": 194 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7417876476377954, "calib/avg_num_step_conf": 4.75, "calib/ece": 0.10098039215686269, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.26249692421259857, "calib/mean_conf": 0.582156862745098, "calib/mu_c": 0.7128906250000001, "calib/mu_w": 0.45039370078740154, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09058823529411761, "calib/std_conf": 0.2785206981817471, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 729.6875, "completions/mean_terminated_length": 732.549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.208, "grad_norm": 0.007542886771261692, "learning_rate": 1.3888888888888888e-07, "loss": 0.0164, "num_tokens": 43507695.0, "reward": 1.214213490486145, "reward_std": 0.4074704647064209, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7863964438438416, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8887485265731812, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6632183908045977, "calib/avg_num_step_conf": 4.80078125, "calib/ece": 0.12578124999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.16442373407890643, "calib/mean_conf": 0.6625000000000001, "calib/mu_c": 0.7337931034482758, "calib/mu_w": 0.5693693693693693, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11093749999999997, "calib/std_conf": 0.23584952830141512, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 625.0390625, "completions/mean_terminated_length": 627.490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.20906666666666668, "grad_norm": 0.008680022321641445, "learning_rate": 1.1111111111111112e-07, "loss": 0.0257, "num_tokens": 43719817.0, "reward": 1.2825042009353638, "reward_std": 0.3366711139678955, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7665624618530273, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9328292012214661, "step": 196 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6545175822800651, "calib/avg_num_step_conf": 4.5078125, "calib/ece": 0.16739130434782606, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.15780252784382431, "calib/mean_conf": 0.6207509881422926, "calib/mu_c": 0.7024590163934427, "calib/mu_w": 0.5446564885496183, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.15296442687747033, "calib/std_conf": 0.2550947844631254, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 757.12109375, "completions/mean_terminated_length": 763.0827026367188, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.21013333333333334, "grad_norm": 0.009027253836393356, "learning_rate": 8.333333333333334e-08, "loss": -0.0007, "num_tokens": 43968264.0, "reward": 1.1785598993301392, "reward_std": 0.40935373306274414, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.7361230850219727, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9404309988021851, "step": 197 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7265509144834403, "calib/avg_num_step_conf": 4.6484375, "calib/ece": 0.08509803921568618, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.22741596638655465, "calib/mean_conf": 0.6019607843137255, "calib/mu_c": 0.7080882352941177, "calib/mu_w": 0.48067226890756304, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07686274509803911, "calib/std_conf": 0.258419173147747, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1612.0, "completions/max_terminated_length": 1612.0, "completions/mean_length": 662.78125, "completions/mean_terminated_length": 665.3804321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 254.0, "epoch": 0.2112, "grad_norm": 0.009608845226466656, "learning_rate": 5.555555555555556e-08, "loss": 0.0032, "num_tokens": 44192888.0, "reward": 1.2467832565307617, "reward_std": 0.3366864025592804, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7822265625, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9023672342300415, "step": 198 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6384757182812103, "calib/avg_num_step_conf": 4.44921875, "calib/ece": 0.16765873015873012, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.15722730739893198, "calib/mean_conf": 0.6632936507936507, "calib/mu_c": 0.7344202898550724, "calib/mu_w": 0.5771929824561404, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14166666666666664, "calib/std_conf": 0.24578160286907955, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2857.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 748.7265625, "completions/mean_terminated_length": 754.6220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.21226666666666666, "grad_norm": 0.006638046354055405, "learning_rate": 2.777777777777778e-08, "loss": 0.0165, "num_tokens": 44438330.0, "reward": 1.2455155849456787, "reward_std": 0.44035571813583374, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7445605397224426, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9273160696029663, "step": 199 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6689035916824196, "calib/avg_num_step_conf": 4.73046875, "calib/ece": 0.12015810276679831, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.1785507246376813, "calib/mean_conf": 0.6434782608695653, "calib/mu_c": 0.7246376811594203, "calib/mu_w": 0.546086956521739, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10909090909090899, "calib/std_conf": 0.24785035592979254, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2875.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 714.94140625, "completions/mean_terminated_length": 720.5708618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.21333333333333335, "grad_norm": 0.0071783047169446945, "learning_rate": 0.0, "loss": -0.0405, "num_tokens": 44678971.0, "reward": 1.2458322048187256, "reward_std": 0.3161921799182892, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7566796541213989, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9043446779251099, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.016906058083986864, "train_runtime": 10412.3351, "train_samples_per_second": 4.917, "train_steps_per_second": 0.019 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44678971, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }