{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.01171875, "calib/ece": 0.6500000000000001, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.025000000000000022, "calib/mean_conf": 0.9833333333333334, "calib/mu_c": 1.0, "calib/mu_w": 0.975, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.6500000000000001, "calib/std_conf": 0.023570226039551608, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 695.9765625, "completions/mean_terminated_length": 748.6134643554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0008487591985613108, "learning_rate": 2.5000000000000004e-07, "loss": 0.0006, "num_tokens": 235322.0, "reward": 0.0074518583714962006, "reward_std": 0.02107703872025013, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0016824332997202873, "step": 1 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.04296875, "calib/ece": 1.0, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 644.19140625, "completions/mean_terminated_length": 717.0130004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.000985708087682724, "learning_rate": 5.000000000000001e-07, "loss": 0.0083, "num_tokens": 453091.0, "reward": 0.002142443088814616, "reward_std": 0.006059744395315647, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0038822719361633062, "step": 2 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.23333333333333334, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.23333333333333336, "calib/mu_c": NaN, "calib/mu_w": 0.23333333333333336, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.23333333333333334, "calib/std_conf": 0.3090127649287144, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 724.421875, "completions/mean_terminated_length": 785.8135375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 693367.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 3 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.078125, "calib/ece": 0.3749750000000001, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.24995, "calib/mean_conf": 0.8749750000000001, "calib/mu_c": 0.99995, "calib/mu_w": 0.75, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.3749750000000001, "calib/std_conf": 0.2164919210386383, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 741.92578125, "completions/mean_terminated_length": 815.1630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0011622401652857661, "learning_rate": 1.0000000000000002e-06, "loss": 0.0023, "num_tokens": 939036.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 4 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.375, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.1499999999999999, "calib/mean_conf": 0.875, "calib/mu_c": 0.95, "calib/mu_w": 0.8, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.375, "calib/std_conf": 0.16393596310755001, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 702.37890625, "completions/mean_terminated_length": 806.3184204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.0011576212709769607, "learning_rate": 1.25e-06, "loss": 0.0156, "num_tokens": 1175101.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 5 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.0703125, "calib/ece": 0.84, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.84, "calib/mu_c": NaN, "calib/mu_w": 0.84, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.84, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 685.578125, "completions/mean_terminated_length": 728.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0064, "grad_norm": 0.0015471765073016286, "learning_rate": 1.5e-06, "loss": -0.0042, "num_tokens": 1406129.0, "reward": 0.002580254338681698, "reward_std": 0.00729806162416935, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0011500000255182385, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.003333517350256443, "step": 6 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.1484375, "calib/ece": 0.34759259259259256, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.5365432098765432, "calib/mean_conf": 0.5975925925925926, "calib/mu_c": 1.0, "calib/mu_w": 0.46345679012345675, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.34759259259259256, "calib/std_conf": 0.42104233289603427, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 725.26953125, "completions/mean_terminated_length": 836.3468627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.0017169080674648285, "learning_rate": 1.75e-06, "loss": 0.0288, "num_tokens": 1648790.0, "reward": 0.015072671696543694, "reward_std": 0.035592082887887955, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.003904687473550439, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0009188127587549388, "step": 7 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.05859375, "calib/ece": 0.09999999999999998, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/mu_c": 0.9, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 706.6875, "completions/mean_terminated_length": 796.9691162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.0014117079554125667, "learning_rate": 2.0000000000000003e-06, "loss": -0.0038, "num_tokens": 1885782.0, "reward": 0.00773979164659977, "reward_std": 0.021891437470912933, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.003867187537252903, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.002912291558459401, "step": 8 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.95, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.95, "calib/mu_c": NaN, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.95, "calib/std_conf": 0.04999999999999999, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 658.0390625, "completions/mean_terminated_length": 726.112060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.0, "learning_rate": 2.25e-06, "loss": 0.0, "num_tokens": 2111344.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 9 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.03125, "calib/ece": 0.3436666666666667, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.48450000000000004, "calib/mean_conf": 0.677, "calib/mu_c": 1.0, "calib/mu_w": 0.5155, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.3436666666666667, "calib/std_conf": 0.4567909806465097, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 707.08203125, "completions/mean_terminated_length": 776.8798217773438, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.010666666666666666, "grad_norm": 0.0006159533513709903, "learning_rate": 2.5e-06, "loss": 0.0054, "num_tokens": 2348725.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 10 }, { "calib/answer_extract_rate": 0.04296875, "calib/avg_num_step_conf": 0.0078125, "calib/ece": 0.515, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.485, "calib/mu_c": 0.485, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.0, "calib/std_conf": 0.385, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 680.48828125, "completions/mean_terminated_length": 757.4130249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.0017216140404343605, "learning_rate": 2.7500000000000004e-06, "loss": -0.0, "num_tokens": 2576978.0, "reward": 0.015625, "reward_std": 0.036563027650117874, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 11 }, { "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.3125, "calib/avg_num_step_conf": 0.07421875, "calib/ece": 0.7233333333333334, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.21999999999999997, "calib/mean_conf": 0.8766666666666666, "calib/mu_c": 0.73, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.6333333333333333, "calib/std_conf": 0.20013884069704097, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 713.0390625, "completions/mean_terminated_length": 770.2025146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.002639840357005596, "learning_rate": 3e-06, "loss": -0.0007, "num_tokens": 2813260.0, "reward": 0.021190494298934937, "reward_std": 0.05565603822469711, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.008079687133431435, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.013915101066231728, "step": 12 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.4375, "calib/avg_num_step_conf": 0.0625, "calib/ece": 0.595, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.01749999999999996, "calib/mean_conf": 0.7616666666666667, "calib/mu_c": 0.75, "calib/mu_w": 0.7675, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.5116666666666667, "calib/std_conf": 0.35918503433312593, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 714.85546875, "completions/mean_terminated_length": 778.7361450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.0011884482810273767, "learning_rate": 3.2500000000000002e-06, "loss": 0.0052, "num_tokens": 3050423.0, "reward": 0.01173363346606493, "reward_std": 0.03318772464990616, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0031845346093177795, "step": 13 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.38888888888888884, "calib/avg_num_step_conf": 0.07421875, "calib/ece": 0.7185185185185186, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.188888888888889, "calib/mean_conf": 0.9037037037037037, "calib/mu_c": 0.7777777777777777, "calib/mu_w": 0.9666666666666667, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.6444444444444445, "calib/std_conf": 0.21107861998185198, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 740.328125, "completions/mean_terminated_length": 824.017333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.0018966062925755978, "learning_rate": 3.5e-06, "loss": -0.0005, "num_tokens": 3294915.0, "reward": 0.01171875, "reward_std": 0.03314562886953354, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 14 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.6666666666666667, "calib/avg_num_step_conf": 0.01953125, "calib/ece": 0.6875, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.08333333333333337, "calib/mean_conf": 0.9375, "calib/mu_c": 1.0, "calib/mu_w": 0.9166666666666666, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.6875, "calib/std_conf": 0.10825317547305482, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 678.6796875, "completions/mean_terminated_length": 752.1298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.016, "grad_norm": 0.0028262899722903967, "learning_rate": 3.7500000000000005e-06, "loss": 0.0127, "num_tokens": 3526105.0, "reward": 0.007777903228998184, "reward_std": 0.021999232470989227, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.001708984375, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.0026936442591249943, "step": 15 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.16666666666666666, "calib/avg_num_step_conf": 0.01953125, "calib/ece": 0.48, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.20000000000000007, "calib/mean_conf": 0.8800000000000001, "calib/mu_c": 0.7999999999999999, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.38, "calib/std_conf": 0.19390719429665315, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 779.78515625, "completions/mean_terminated_length": 853.0983276367188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.002077557845041156, "learning_rate": 4.000000000000001e-06, "loss": -0.0057, "num_tokens": 3784146.0, "reward": 0.015214828774333, "reward_std": 0.043034035712480545, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0029296875, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0034374422393739223, "step": 16 }, { "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.03125, "calib/ece": 0.6283333333333334, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.020000000000000018, "calib/mean_conf": 0.9116666666666666, "calib/mu_c": 0.925, "calib/mu_w": 0.905, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.6033333333333334, "calib/std_conf": 0.12047360245667466, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 717.52734375, "completions/mean_terminated_length": 775.0505981445312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.002145631704479456, "learning_rate": 4.25e-06, "loss": 0.0017, "num_tokens": 4020929.0, "reward": 0.011951509863138199, "reward_std": 0.026172826066613197, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.002152734436094761, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.002875569509342313, "step": 17 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.6666666666666667, "calib/avg_num_step_conf": 0.01953125, "calib/ece": 0.5182293672839506, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.30902751028806585, "calib/mean_conf": 0.7682293672839506, "calib/mu_c": 1.0, "calib/mu_w": 0.6909724897119341, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.5182293672839506, "calib/std_conf": 0.401438511566583, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2947.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 765.953125, "completions/mean_terminated_length": 834.3999633789062, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0192, "grad_norm": 0.001130632241256535, "learning_rate": 4.5e-06, "loss": 0.0129, "num_tokens": 4277301.0, "reward": 0.007290839217603207, "reward_std": 0.020621608942747116, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0038854805752635, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0010798965813592076, "step": 18 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.648899549171724, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6363636363636364, "calib/gap": 0.2862104959111036, "calib/mean_conf": 0.7398086400808149, "calib/mu_c": 1.0, "calib/mu_w": 0.7137895040888964, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.648899549171724, "calib/std_conf": 0.3887335985193895, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 654.9296875, "completions/mean_terminated_length": 722.6810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.020266666666666665, "grad_norm": 0.00234220246784389, "learning_rate": 4.75e-06, "loss": 0.0088, "num_tokens": 4499291.0, "reward": 0.009577165357768536, "reward_std": 0.027088314294815063, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.004096976947039366, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.005114707630127668, "step": 19 }, { "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.7333333333333334, "calib/avg_num_step_conf": 0.078125, "calib/ece": 0.3563636363636363, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6363636363636364, "calib/gap": 0.21400000000000008, "calib/mean_conf": 0.8127272727272729, "calib/mu_c": 0.91, "calib/mu_w": 0.696, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.3118181818181818, "calib/std_conf": 0.30558709245382104, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 636.19140625, "completions/mean_terminated_length": 705.0432739257812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.003776842262595892, "learning_rate": 5e-06, "loss": 0.0404, "num_tokens": 4716596.0, "reward": 0.05176505446434021, "reward_std": 0.1317472606897354, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.01792929694056511, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.018076637759804726, "step": 20 }, { "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.65, "calib/avg_num_step_conf": 0.14453125, "calib/ece": 0.5698058823529412, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6470588235294118, "calib/gap": 0.12194166666666661, "calib/mean_conf": 0.8639235294117648, "calib/mu_c": 0.95, "calib/mu_w": 0.8280583333333333, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.5698058823529412, "calib/std_conf": 0.21797371544027358, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2776.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 708.9921875, "completions/mean_terminated_length": 762.6134643554688, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0224, "grad_norm": 0.0033623469062149525, "learning_rate": 4.9722222222222224e-06, "loss": -0.0056, "num_tokens": 4950626.0, "reward": 0.036954864859580994, "reward_std": 0.10452413558959961, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.011474609375, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.01862023025751114, "step": 21 }, { "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.52, "calib/avg_num_step_conf": 0.125, "calib/ece": 0.5760666666666666, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.0049000000000001265, "calib/mean_conf": 0.8427333333333333, "calib/mu_c": 0.8460000000000001, "calib/mu_w": 0.8411, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.5427333333333333, "calib/std_conf": 0.2662591386016429, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2857.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 667.05078125, "completions/mean_terminated_length": 736.0560302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.023466666666666667, "grad_norm": 0.0026941606774926186, "learning_rate": 4.944444444444445e-06, "loss": 0.0104, "num_tokens": 5172775.0, "reward": 0.04088283330202103, "reward_std": 0.10474004596471786, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.012176171876490116, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.01730399578809738, "step": 22 }, { "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.696078431372549, "calib/avg_num_step_conf": 0.16796875, "calib/ece": 0.5299442586399108, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.6086956521739131, "calib/gap": 0.2491930618401207, "calib/mean_conf": 0.790813823857302, "calib/mu_c": 0.975, "calib/mu_w": 0.7258069381598793, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.5299442586399108, "calib/std_conf": 0.3417305783731682, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 682.4453125, "completions/mean_terminated_length": 740.2796630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.024533333333333334, "grad_norm": 0.002465737285092473, "learning_rate": 4.9166666666666665e-06, "loss": 0.0166, "num_tokens": 5400985.0, "reward": 0.05742061138153076, "reward_std": 0.1423000991344452, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02013828232884407, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.022218381986021996, "step": 23 }, { "calib/answer_extract_rate": 0.1328125, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.1484375, "calib/ece": 0.46157368421052625, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.47368421052631576, "calib/gap": 0.10405595238095233, "calib/mean_conf": 0.8299947368421051, "calib/mu_c": 0.8957142857142857, "calib/mu_w": 0.7916583333333334, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1484375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.46157368421052625, "calib/std_conf": 0.23670612575997937, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 681.9296875, "completions/mean_terminated_length": 712.5469360351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0256, "grad_norm": 0.0032444254029542208, "learning_rate": 4.888888888888889e-06, "loss": -0.0057, "num_tokens": 5629639.0, "reward": 0.05443684384226799, "reward_std": 0.13633695244789124, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.02127421647310257, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.02363644167780876, "step": 24 }, { "calib/answer_extract_rate": 0.20703125, "calib/auroc": 0.6169950738916257, "calib/avg_num_step_conf": 0.3125, "calib/ece": 0.4677523200170542, "calib/final_conf_rate": 0.16796875, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.46511627906976744, "calib/gap": 0.14476133829983573, "calib/mean_conf": 0.6937988316449611, "calib/mu_c": 0.7914285714285713, "calib/mu_w": 0.6466672331287355, "calib/nonempty_final_conf_rate": 0.16796875, "calib/nonempty_reasoning_rate": 0.25, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.4179848781565891, "calib/std_conf": 0.38119765456314086, "calib/step_conf_rate": 0.14453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 641.03515625, "completions/mean_terminated_length": 698.3191528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.02666666666666667, "grad_norm": 0.004970417357981205, "learning_rate": 4.861111111111111e-06, "loss": 0.007, "num_tokens": 5846536.0, "reward": 0.12949004769325256, "reward_std": 0.21166762709617615, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.050851866602897644, "rewards/format_reward_step": 0.09375, "rewards/stepwise_brier_reward": 0.06938145309686661, "step": 25 }, { "calib/answer_extract_rate": 0.3046875, "calib/auroc": 0.6566666666666666, "calib/avg_num_step_conf": 0.625, "calib/ece": 0.608535467579016, "calib/final_conf_rate": 0.2421875, "calib/format_rate": 0.1328125, "calib/frac_conf_gt_0.9": 0.532258064516129, "calib/gap": 0.13554531313131324, "calib/mean_conf": 0.774022596937113, "calib/mu_c": 0.8833333333333334, "calib/mu_w": 0.7477880202020202, "calib/nonempty_final_conf_rate": 0.2421875, "calib/nonempty_reasoning_rate": 0.36328125, "calib/nonempty_step_conf_rate": 0.19921875, "calib/pce": 0.5945048387096774, "calib/std_conf": 0.31876347751671763, "calib/step_conf_rate": 0.19921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 634.7421875, "completions/mean_terminated_length": 674.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.027733333333333332, "grad_norm": 0.005587348248809576, "learning_rate": 4.833333333333333e-06, "loss": 0.0142, "num_tokens": 6063838.0, "reward": 0.14023250341415405, "reward_std": 0.27342501282691956, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.05323883146047592, "rewards/format_reward_step": 0.1328125, "rewards/stepwise_brier_reward": 0.0919523760676384, "step": 26 }, { "calib/answer_extract_rate": 0.2890625, "calib/auroc": 0.6232558139534884, "calib/avg_num_step_conf": 0.9453125, "calib/ece": 0.7037980817610064, "calib/final_conf_rate": 0.20703125, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.7358490566037735, "calib/gap": 0.06330410852713164, "calib/mean_conf": 0.8585150628930819, "calib/mu_c": 0.9098749999999999, "calib/mu_w": 0.8465708914728682, "calib/nonempty_final_conf_rate": 0.20703125, "calib/nonempty_reasoning_rate": 0.3359375, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.6868169496855348, "calib/std_conf": 0.2723051982083245, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 632.15234375, "completions/mean_terminated_length": 674.2958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0288, "grad_norm": 0.005046192090958357, "learning_rate": 4.805555555555556e-06, "loss": 0.0392, "num_tokens": 6280453.0, "reward": 0.1070917472243309, "reward_std": 0.2380264401435852, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.023514632135629654, "rewards/format_reward_step": 0.1015625, "rewards/stepwise_brier_reward": 0.056337736546993256, "step": 27 }, { "calib/answer_extract_rate": 0.3515625, "calib/auroc": 0.5287569573283859, "calib/avg_num_step_conf": 0.625, "calib/ece": 0.543011719700939, "calib/final_conf_rate": 0.27734375, "calib/format_rate": 0.16796875, "calib/frac_conf_gt_0.9": 0.5492957746478874, "calib/gap": 0.0214253807919913, "calib/mean_conf": 0.7671150408624413, "calib/mu_c": 0.7819015712681818, "calib/mu_w": 0.7604761904761905, "calib/nonempty_final_conf_rate": 0.27734375, "calib/nonempty_reasoning_rate": 0.41796875, "calib/nonempty_step_conf_rate": 0.24609375, "calib/pce": 0.5001338028169013, "calib/std_conf": 0.3338678281776153, "calib/step_conf_rate": 0.24609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 663.3046875, "completions/mean_terminated_length": 690.2682495117188, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.029866666666666666, "grad_norm": 0.004129378125071526, "learning_rate": 4.777777777777778e-06, "loss": 0.0671, "num_tokens": 6506771.0, "reward": 0.21467575430870056, "reward_std": 0.39510178565979004, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.08527621626853943, "rewards/format_reward_step": 0.16796875, "rewards/stepwise_brier_reward": 0.11158812046051025, "step": 28 }, { "calib/answer_extract_rate": 0.41015625, "calib/auroc": 0.5290178571428572, "calib/avg_num_step_conf": 0.8671875, "calib/ece": 0.6497173372093024, "calib/final_conf_rate": 0.3359375, "calib/format_rate": 0.1796875, "calib/frac_conf_gt_0.9": 0.6395348837209303, "calib/gap": 0.1153294142857143, "calib/mean_conf": 0.8192522209302324, "calib/mu_c": 0.913125, "calib/mu_w": 0.7977955857142857, "calib/nonempty_final_conf_rate": 0.3359375, "calib/nonempty_reasoning_rate": 0.484375, "calib/nonempty_step_conf_rate": 0.26953125, "calib/pce": 0.641461523255814, "calib/std_conf": 0.30291014471554223, "calib/step_conf_rate": 0.26953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 624.68359375, "completions/mean_terminated_length": 647.4453735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.030933333333333334, "grad_norm": 0.005222437437623739, "learning_rate": 4.75e-06, "loss": -0.0177, "num_tokens": 6723386.0, "reward": 0.2006719410419464, "reward_std": 0.3653067946434021, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.08126065135002136, "rewards/format_reward_step": 0.1796875, "rewards/stepwise_brier_reward": 0.12766645848751068, "step": 29 }, { "calib/answer_extract_rate": 0.44921875, "calib/auroc": 0.5370705244122966, "calib/avg_num_step_conf": 1.7109375, "calib/ece": 0.621134, "calib/final_conf_rate": 0.390625, "calib/format_rate": 0.234375, "calib/frac_conf_gt_0.9": 0.56, "calib/gap": 0.06032754671488849, "calib/mean_conf": 0.794246, "calib/mu_c": 0.8419047619047619, "calib/mu_w": 0.7815772151898734, "calib/nonempty_final_conf_rate": 0.390625, "calib/nonempty_reasoning_rate": 0.55078125, "calib/nonempty_step_conf_rate": 0.36328125, "calib/pce": 0.60269, "calib/std_conf": 0.3094254919750472, "calib/step_conf_rate": 0.36328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 660.0, "completions/mean_terminated_length": 681.290283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.032, "grad_norm": 0.005847262218594551, "learning_rate": 4.722222222222222e-06, "loss": 0.0274, "num_tokens": 6948898.0, "reward": 0.24672269821166992, "reward_std": 0.4171825647354126, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.09664709866046906, "rewards/format_reward_step": 0.234375, "rewards/stepwise_brier_reward": 0.13734658062458038, "step": 30 }, { "calib/answer_extract_rate": 0.5546875, "calib/auroc": 0.5846328784925277, "calib/avg_num_step_conf": 1.4921875, "calib/ece": 0.5767189827800069, "calib/final_conf_rate": 0.55078125, "calib/format_rate": 0.3203125, "calib/frac_conf_gt_0.9": 0.475177304964539, "calib/gap": 0.11232585965396824, "calib/mean_conf": 0.7452722364027018, "calib/mu_c": 0.8360888888888889, "calib/mu_w": 0.7237630292349206, "calib/nonempty_final_conf_rate": 0.55078125, "calib/nonempty_reasoning_rate": 0.640625, "calib/nonempty_step_conf_rate": 0.4296875, "calib/pce": 0.5652509287402906, "calib/std_conf": 0.3307988490229472, "calib/step_conf_rate": 0.4296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 638.125, "completions/mean_terminated_length": 648.2540283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.03306666666666667, "grad_norm": 0.004652371630072594, "learning_rate": 4.694444444444445e-06, "loss": 0.0786, "num_tokens": 7167738.0, "reward": 0.32629090547561646, "reward_std": 0.43779438734054565, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.14228935539722443, "rewards/format_reward_step": 0.3203125, "rewards/stepwise_brier_reward": 0.1987099051475525, "step": 31 }, { "calib/answer_extract_rate": 0.5859375, "calib/auroc": 0.5058571428571428, "calib/avg_num_step_conf": 1.4765625, "calib/ece": 0.5549560987654322, "calib/final_conf_rate": 0.52734375, "calib/format_rate": 0.328125, "calib/frac_conf_gt_0.9": 0.4666666666666667, "calib/gap": -0.020848761904761925, "calib/mean_conf": 0.7469080987654321, "calib/mu_c": 0.7314645714285715, "calib/mu_w": 0.7523133333333334, "calib/nonempty_final_conf_rate": 0.52734375, "calib/nonempty_reasoning_rate": 0.67578125, "calib/nonempty_step_conf_rate": 0.4609375, "calib/pce": 0.5213024691358026, "calib/std_conf": 0.32492967651162474, "calib/step_conf_rate": 0.4609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 560.6875, "completions/mean_terminated_length": 569.5873413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 11.0, "epoch": 0.034133333333333335, "grad_norm": 0.005850342568010092, "learning_rate": 4.666666666666667e-06, "loss": 0.0303, "num_tokens": 7367546.0, "reward": 0.38101819157600403, "reward_std": 0.45329520106315613, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.1564299762248993, "rewards/format_reward_step": 0.328125, "rewards/stepwise_brier_reward": 0.19246289134025574, "step": 32 }, { "calib/answer_extract_rate": 0.69921875, "calib/auroc": 0.550956156716418, "calib/avg_num_step_conf": 1.8984375, "calib/ece": 0.6287451464439183, "calib/final_conf_rate": 0.6484375, "calib/format_rate": 0.45703125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.0035832961963400134, "calib/mean_conf": 0.7702324596969303, "calib/mu_c": 0.773125, "calib/mu_w": 0.7695417038036599, "calib/nonempty_final_conf_rate": 0.6484375, "calib/nonempty_reasoning_rate": 0.76171875, "calib/nonempty_step_conf_rate": 0.55859375, "calib/pce": 0.6031032609017496, "calib/std_conf": 0.3125446232054246, "calib/step_conf_rate": 0.55859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2298.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 536.46484375, "completions/mean_terminated_length": 544.9801635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.0352, "grad_norm": 0.005888388957828283, "learning_rate": 4.638888888888889e-06, "loss": 0.0636, "num_tokens": 7561321.0, "reward": 0.4416656196117401, "reward_std": 0.5061711668968201, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.19741714000701904, "rewards/format_reward_step": 0.45703125, "rewards/stepwise_brier_reward": 0.2921407222747803, "step": 33 }, { "calib/answer_extract_rate": 0.76171875, "calib/auroc": 0.5271682340647859, "calib/avg_num_step_conf": 2.63671875, "calib/ece": 0.6528921370344805, "calib/final_conf_rate": 0.7578125, "calib/format_rate": 0.48046875, "calib/frac_conf_gt_0.9": 0.5257731958762887, "calib/gap": 0.048052003561966816, "calib/mean_conf": 0.7936137865190167, "calib/mu_c": 0.8344827586206895, "calib/mu_w": 0.7864307550587227, "calib/nonempty_final_conf_rate": 0.7578125, "calib/nonempty_reasoning_rate": 0.8828125, "calib/nonempty_step_conf_rate": 0.6484375, "calib/pce": 0.6485106937355114, "calib/std_conf": 0.2976252390781631, "calib/step_conf_rate": 0.6484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2677.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 488.46875, "completions/mean_terminated_length": 490.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.03626666666666667, "grad_norm": 0.006886497605592012, "learning_rate": 4.611111111111112e-06, "loss": -0.0012, "num_tokens": 7741049.0, "reward": 0.44409021735191345, "reward_std": 0.523621678352356, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.19314272701740265, "rewards/format_reward_step": 0.48046875, "rewards/stepwise_brier_reward": 0.329137921333313, "step": 34 }, { "calib/answer_extract_rate": 0.7578125, "calib/auroc": 0.5019651401230348, "calib/avg_num_step_conf": 3.12890625, "calib/ece": 0.5560118690291648, "calib/final_conf_rate": 0.75, "calib/format_rate": 0.48046875, "calib/frac_conf_gt_0.9": 0.40625, "calib/gap": 0.0016982368679209214, "calib/mean_conf": 0.712848398827978, "calib/mu_c": 0.7142105263157895, "calib/mu_w": 0.7125122894478686, "calib/nonempty_final_conf_rate": 0.75, "calib/nonempty_reasoning_rate": 0.8359375, "calib/nonempty_step_conf_rate": 0.60546875, "calib/pce": 0.535471800595238, "calib/std_conf": 0.3321984379710554, "calib/step_conf_rate": 0.60546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2629.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 545.8203125, "completions/mean_terminated_length": 550.1181030273438, "completions/min_length": 0.0, "completions/min_terminated_length": 23.0, "epoch": 0.037333333333333336, "grad_norm": 0.005417841020971537, "learning_rate": 4.583333333333333e-06, "loss": 0.0193, "num_tokens": 7939603.0, "reward": 0.48744523525238037, "reward_std": 0.5235015153884888, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.21776510775089264, "rewards/format_reward_step": 0.48046875, "rewards/stepwise_brier_reward": 0.32831326127052307, "step": 35 }, { "calib/answer_extract_rate": 0.8203125, "calib/auroc": 0.4933014354066986, "calib/avg_num_step_conf": 2.86328125, "calib/ece": 0.5209496376811605, "calib/final_conf_rate": 0.80859375, "calib/format_rate": 0.6171875, "calib/frac_conf_gt_0.9": 0.3961352657004831, "calib/gap": -0.03546680622009957, "calib/mean_conf": 0.6958955314009672, "calib/mu_c": 0.669852272727271, "calib/mu_w": 0.7053190789473706, "calib/nonempty_final_conf_rate": 0.80859375, "calib/nonempty_reasoning_rate": 0.90234375, "calib/nonempty_step_conf_rate": 0.71875, "calib/pce": 0.47557234299517026, "calib/std_conf": 0.3317535883627056, "calib/step_conf_rate": 0.71875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1874.0, "completions/max_terminated_length": 1874.0, "completions/mean_length": 446.47265625, "completions/mean_terminated_length": 448.2235412597656, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.0384, "grad_norm": 0.006148908287286758, "learning_rate": 4.555555555555556e-06, "loss": 0.0568, "num_tokens": 8106180.0, "reward": 0.6918026208877563, "reward_std": 0.6030615568161011, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.32322216033935547, "rewards/format_reward_step": 0.6171875, "rewards/stepwise_brier_reward": 0.45826616883277893, "step": 36 }, { "calib/answer_extract_rate": 0.81640625, "calib/auroc": 0.5715029761904762, "calib/avg_num_step_conf": 2.6953125, "calib/ece": 0.5662541586057692, "calib/final_conf_rate": 0.8125, "calib/format_rate": 0.57421875, "calib/frac_conf_gt_0.9": 0.40384615384615385, "calib/gap": 0.057586398869047595, "calib/mean_conf": 0.7308629086057692, "calib/mu_c": 0.7773749999999999, "calib/mu_w": 0.7197886011309523, "calib/nonempty_final_conf_rate": 0.8125, "calib/nonempty_reasoning_rate": 0.90234375, "calib/nonempty_step_conf_rate": 0.6875, "calib/pce": 0.5524046874519231, "calib/std_conf": 0.30629556282873216, "calib/step_conf_rate": 0.6875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2216.0, "completions/max_terminated_length": 2216.0, "completions/mean_length": 478.390625, "completions/mean_terminated_length": 480.2666931152344, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.039466666666666664, "grad_norm": 0.0059788888320326805, "learning_rate": 4.527777777777778e-06, "loss": -0.0013, "num_tokens": 8285312.0, "reward": 0.5644609928131104, "reward_std": 0.5739938020706177, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.26444345712661743, "rewards/format_reward_step": 0.57421875, "rewards/stepwise_brier_reward": 0.3992696702480316, "step": 37 }, { "calib/answer_extract_rate": 0.81640625, "calib/auroc": 0.5535502958579882, "calib/avg_num_step_conf": 3.20703125, "calib/ece": 0.4963351124697266, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.62890625, "calib/frac_conf_gt_0.9": 0.31100478468899523, "calib/gap": 0.06486359794177798, "calib/mean_conf": 0.6674671794553724, "calib/mu_c": 0.7199166916666666, "calib/mu_w": 0.6550530937248886, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 0.92578125, "calib/nonempty_step_conf_rate": 0.76171875, "calib/pce": 0.48620736605824333, "calib/std_conf": 0.32891209907867613, "calib/step_conf_rate": 0.76171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2375.0, "completions/max_terminated_length": 2375.0, "completions/mean_length": 479.21875, "completions/mean_terminated_length": 484.9012145996094, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.04053333333333333, "grad_norm": 0.005851101130247116, "learning_rate": 4.5e-06, "loss": 0.0499, "num_tokens": 8464448.0, "reward": 0.6287873983383179, "reward_std": 0.5634995102882385, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.338528573513031, "rewards/format_reward_step": 0.62890625, "rewards/stepwise_brier_reward": 0.45840492844581604, "step": 38 }, { "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.522828947368421, "calib/avg_num_step_conf": 3.29296875, "calib/ece": 0.5117534932515357, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.2826086956521739, "calib/gap": 0.019911174835860357, "calib/mean_conf": 0.6628849715124053, "calib/mu_c": 0.6793333333333333, "calib/mu_w": 0.659422158497473, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.85546875, "calib/pce": 0.5003627106428401, "calib/std_conf": 0.3261157697479711, "calib/step_conf_rate": 0.85546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1507.0, "completions/max_terminated_length": 1507.0, "completions/mean_length": 410.53515625, "completions/mean_terminated_length": 412.1451110839844, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.0416, "grad_norm": 0.006558020133525133, "learning_rate": 4.472222222222223e-06, "loss": 0.0296, "num_tokens": 8625201.0, "reward": 0.73180091381073, "reward_std": 0.47666364908218384, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.391775906085968, "rewards/format_reward_step": 0.76171875, "rewards/stepwise_brier_reward": 0.5577144622802734, "step": 39 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5290841584158416, "calib/avg_num_step_conf": 3.2890625, "calib/ece": 0.5333832492997199, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.25630252100840334, "calib/gap": 0.03175771727172716, "calib/mean_conf": 0.6602765266106443, "calib/mu_c": 0.6872305555555555, "calib/mu_w": 0.6554728382838283, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.5211996358543418, "calib/std_conf": 0.3150771534789929, "calib/step_conf_rate": 0.8671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1828.0, "completions/max_terminated_length": 1828.0, "completions/mean_length": 442.69921875, "completions/mean_terminated_length": 444.4353332519531, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.042666666666666665, "grad_norm": 0.00583281833678484, "learning_rate": 4.444444444444444e-06, "loss": -0.0175, "num_tokens": 8794860.0, "reward": 0.7500452399253845, "reward_std": 0.5234329700469971, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.4331750273704529, "rewards/format_reward_step": 0.8125, "rewards/stepwise_brier_reward": 0.5807058811187744, "step": 40 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.534021978021978, "calib/avg_num_step_conf": 3.30078125, "calib/ece": 0.44919504960317475, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.2791666666666667, "calib/gap": 0.03207007116692839, "calib/mean_conf": 0.6562501884920634, "calib/mu_c": 0.6796346153846154, "calib/mu_w": 0.647564544217687, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.41730595238095247, "calib/std_conf": 0.32355005709777834, "calib/step_conf_rate": 0.87890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 418.36328125, "completions/mean_terminated_length": 420.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.04373333333333333, "grad_norm": 0.006292893085628748, "learning_rate": 4.416666666666667e-06, "loss": 0.0023, "num_tokens": 8958777.0, "reward": 0.8849216103553772, "reward_std": 0.5418712496757507, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.46316659450531006, "rewards/format_reward_step": 0.8203125, "rewards/stepwise_brier_reward": 0.597728431224823, "step": 41 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6087782721542454, "calib/avg_num_step_conf": 3.39453125, "calib/ece": 0.405187424590164, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.24180327868852458, "calib/gap": 0.11658757256210595, "calib/mean_conf": 0.6234431081967213, "calib/mu_c": 0.7123172413793103, "calib/mu_w": 0.5957296688172043, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.3954628073770493, "calib/std_conf": 0.32575273275629457, "calib/step_conf_rate": 0.93359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1634.0, "completions/max_terminated_length": 1634.0, "completions/mean_length": 393.47265625, "completions/mean_terminated_length": 393.47265625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.0448, "grad_norm": 0.006545498501509428, "learning_rate": 4.388888888888889e-06, "loss": -0.0028, "num_tokens": 9113442.0, "reward": 0.9316054582595825, "reward_std": 0.5143445730209351, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.533115029335022, "rewards/format_reward_step": 0.87890625, "rewards/stepwise_brier_reward": 0.6680043935775757, "step": 42 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5251258973534769, "calib/avg_num_step_conf": 3.13671875, "calib/ece": 0.3751279607492878, "calib/final_conf_rate": 0.9140625, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.18376068376068377, "calib/gap": 0.029125472668809604, "calib/mean_conf": 0.566652974994302, "calib/mu_c": 0.589430588235294, "calib/mu_w": 0.5603051155664844, "calib/nonempty_final_conf_rate": 0.9140625, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.3619161088974359, "calib/std_conf": 0.30766516574131414, "calib/step_conf_rate": 0.91015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2194.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 443.0390625, "completions/mean_terminated_length": 443.0390625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.04586666666666667, "grad_norm": 0.006089336704462767, "learning_rate": 4.361111111111112e-06, "loss": 0.007, "num_tokens": 9281652.0, "reward": 0.903231143951416, "reward_std": 0.49017244577407837, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.5439672470092773, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.6859275102615356, "step": 43 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5084262380088151, "calib/avg_num_step_conf": 3.11328125, "calib/ece": 0.39130325034578145, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.15767634854771784, "calib/gap": 0.011761451041396698, "calib/mean_conf": 0.5245667358229599, "calib/mu_c": 0.5344736842105263, "calib/mu_w": 0.5227122331691296, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.3790968188105117, "calib/std_conf": 0.32499933248325935, "calib/step_conf_rate": 0.93359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1745.0, "completions/max_terminated_length": 1745.0, "completions/mean_length": 456.28515625, "completions/mean_terminated_length": 458.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.046933333333333334, "grad_norm": 0.005765858571976423, "learning_rate": 4.333333333333334e-06, "loss": -0.0189, "num_tokens": 9454349.0, "reward": 0.8806015253067017, "reward_std": 0.4581907093524933, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.5674974322319031, "rewards/format_reward_step": 0.89453125, "rewards/stepwise_brier_reward": 0.7045985460281372, "step": 44 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.48581151832460734, "calib/avg_num_step_conf": 3.33203125, "calib/ece": 0.3394438027953701, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.08298755186721991, "calib/gap": -0.013969121206944157, "calib/mean_conf": 0.4948034127538764, "calib/mu_c": 0.4837324494736841, "calib/mu_w": 0.49770157068062826, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.31338916794059835, "calib/std_conf": 0.3010887186963321, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2407.0, "completions/max_terminated_length": 2407.0, "completions/mean_length": 392.328125, "completions/mean_terminated_length": 392.328125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.048, "grad_norm": 0.006556249689310789, "learning_rate": 4.305555555555556e-06, "loss": -0.0063, "num_tokens": 9609401.0, "reward": 0.9578429460525513, "reward_std": 0.4669501483440399, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.6010839939117432, "rewards/format_reward_step": 0.91015625, "rewards/stepwise_brier_reward": 0.7401412725448608, "step": 45 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5820621468926553, "calib/avg_num_step_conf": 3.0390625, "calib/ece": 0.2903058577405858, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.100418410041841, "calib/gap": 0.09431298493408669, "calib/mean_conf": 0.4787150627615063, "calib/mu_c": 0.5497457627118645, "calib/mu_w": 0.4554327777777778, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.2610794979079498, "calib/std_conf": 0.31240829553709604, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2781.0, "completions/max_terminated_length": 2781.0, "completions/mean_length": 424.87890625, "completions/mean_terminated_length": 424.87890625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.04906666666666667, "grad_norm": 0.006347313988953829, "learning_rate": 4.277777777777778e-06, "loss": 0.016, "num_tokens": 9772506.0, "reward": 0.9869669079780579, "reward_std": 0.4626862406730652, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.6165705919265747, "rewards/format_reward_step": 0.8828125, "rewards/stepwise_brier_reward": 0.7334764003753662, "step": 46 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4998717510259918, "calib/avg_num_step_conf": 2.8125, "calib/ece": 0.3017594949537037, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.07083333333333333, "calib/gap": 0.006677570934032595, "calib/mean_conf": 0.43197911337962963, "calib/mu_c": 0.436764705882353, "calib/mu_w": 0.4300871349483204, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.22520263749999997, "calib/std_conf": 0.3029079213144214, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 420.1640625, "completions/mean_terminated_length": 421.8117980957031, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.050133333333333335, "grad_norm": 0.0059139239601790905, "learning_rate": 4.25e-06, "loss": 0.0216, "num_tokens": 9935612.0, "reward": 1.0119876861572266, "reward_std": 0.45886707305908203, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.5972976684570312, "rewards/format_reward_step": 0.875, "rewards/stepwise_brier_reward": 0.7096055746078491, "step": 47 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.46787658802177856, "calib/avg_num_step_conf": 3.1640625, "calib/ece": 0.31055860215053765, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.06854838709677419, "calib/gap": -0.032152534785238995, "calib/mean_conf": 0.4296674731182796, "calib/mu_c": 0.40503448275862075, "calib/mu_w": 0.43718701754385975, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.25317755376344087, "calib/std_conf": 0.29134742010503306, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 400.90625, "completions/mean_terminated_length": 402.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.0512, "grad_norm": 0.00578545406460762, "learning_rate": 4.222222222222223e-06, "loss": 0.0263, "num_tokens": 10091500.0, "reward": 1.0206351280212402, "reward_std": 0.38921114802360535, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.6308324933052063, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.7661881446838379, "step": 48 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5342419208698279, "calib/avg_num_step_conf": 3.265625, "calib/ece": 0.2022221817092933, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.040160642570281124, "calib/gap": 0.017430107211739654, "calib/mean_conf": 0.36531401571197075, "calib/mu_c": 0.3773540897698792, "calib/mu_w": 0.35992398255813957, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.12914962481504966, "calib/std_conf": 0.2741067596898534, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2104.0, "completions/max_terminated_length": 2104.0, "completions/mean_length": 398.07421875, "completions/mean_terminated_length": 398.07421875, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.05226666666666667, "grad_norm": 0.0057755582965910435, "learning_rate": 4.194444444444445e-06, "loss": -0.0006, "num_tokens": 10247511.0, "reward": 1.1383147239685059, "reward_std": 0.3906211256980896, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6833595037460327, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8240399956703186, "step": 49 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5059739866908651, "calib/avg_num_step_conf": 2.75390625, "calib/ece": 0.22803238084848487, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.02, "calib/gap": 0.0008633480723753695, "calib/mean_conf": 0.3828160555151515, "calib/mu_c": 0.38341694577352475, "calib/mu_w": 0.3825535977011494, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.15342421818181817, "calib/std_conf": 0.2580277267496595, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2193.0, "completions/max_terminated_length": 2193.0, "completions/mean_length": 458.51953125, "completions/mean_terminated_length": 460.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.05333333333333334, "grad_norm": 0.005000683479011059, "learning_rate": 4.166666666666667e-06, "loss": -0.0097, "num_tokens": 10419820.0, "reward": 1.1152260303497314, "reward_std": 0.3721734881401062, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.6713389158248901, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7901011109352112, "step": 50 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48169112508735146, "calib/avg_num_step_conf": 2.8515625, "calib/ece": 0.2317144578313253, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": -0.024234863731656175, "calib/mean_conf": 0.349529718875502, "calib/mu_c": 0.3340544444444445, "calib/mu_w": 0.35828930817610066, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1098991967871486, "calib/std_conf": 0.24695792887627788, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1808.0, "completions/max_terminated_length": 1808.0, "completions/mean_length": 421.43359375, "completions/mean_terminated_length": 423.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.0544, "grad_norm": 0.005766298621892929, "learning_rate": 4.138888888888889e-06, "loss": 0.0076, "num_tokens": 10586571.0, "reward": 1.1778815984725952, "reward_std": 0.35190486907958984, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6635822653770447, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.807799220085144, "step": 51 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5847146739130435, "calib/avg_num_step_conf": 2.4140625, "calib/ece": 0.14099603174603176, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.0635904891304348, "calib/mean_conf": 0.29614682539682535, "calib/mu_c": 0.3365217391304348, "calib/mu_w": 0.27293125, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.036031746031746026, "calib/std_conf": 0.21578038114422565, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1771.0, "completions/max_terminated_length": 1771.0, "completions/mean_length": 463.85546875, "completions/mean_terminated_length": 463.85546875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.055466666666666664, "grad_norm": 0.00548426853492856, "learning_rate": 4.111111111111111e-06, "loss": 0.0218, "num_tokens": 10762838.0, "reward": 1.2175707817077637, "reward_std": 0.39718616008758545, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.7146013379096985, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8504550457000732, "step": 52 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5517492711370263, "calib/avg_num_step_conf": 2.8359375, "calib/ece": 0.19429305555555557, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.05148232838589978, "calib/mean_conf": 0.32177837301587303, "calib/mu_c": 0.35323979591836735, "calib/mu_w": 0.30175746753246757, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.06359126984126984, "calib/std_conf": 0.20863787849712598, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1424.0, "completions/max_terminated_length": 1424.0, "completions/mean_length": 467.796875, "completions/mean_terminated_length": 469.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.05653333333333333, "grad_norm": 0.005103487987071276, "learning_rate": 4.083333333333334e-06, "loss": -0.0497, "num_tokens": 10937986.0, "reward": 1.2299796342849731, "reward_std": 0.37857282161712646, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.7058714628219604, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8128631711006165, "step": 53 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.519959451901566, "calib/avg_num_step_conf": 2.625, "calib/ece": 0.20242142857142856, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": 0.010882672678970917, "calib/mean_conf": 0.2956602040816327, "calib/mu_c": 0.30227864583333336, "calib/mu_w": 0.29139597315436244, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.05312244897959184, "calib/std_conf": 0.20148572192781972, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 422.3046875, "completions/mean_terminated_length": 423.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.0576, "grad_norm": 0.005721841938793659, "learning_rate": 4.055555555555556e-06, "loss": -0.0556, "num_tokens": 11101896.0, "reward": 1.2086176872253418, "reward_std": 0.3556395173072815, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6776452660560608, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.8135550022125244, "step": 54 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6061990335246148, "calib/avg_num_step_conf": 2.640625, "calib/ece": 0.12081600475234272, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.06854713523985712, "calib/mean_conf": 0.2913125093038822, "calib/mu_c": 0.3386623376623377, "calib/mu_w": 0.2701152024224806, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.05144578313253013, "calib/std_conf": 0.19960611928322558, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 460.8828125, "completions/mean_terminated_length": 460.8828125, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "epoch": 0.058666666666666666, "grad_norm": 0.005817664321511984, "learning_rate": 4.027777777777779e-06, "loss": -0.0206, "num_tokens": 11277274.0, "reward": 1.162182331085205, "reward_std": 0.383277028799057, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7293254137039185, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8416409492492676, "step": 55 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4731471135940409, "calib/avg_num_step_conf": 2.30078125, "calib/ece": 0.17838267716535436, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.02074724022346361, "calib/mean_conf": 0.25350708661417326, "calib/mu_c": 0.23888600000000007, "calib/mu_w": 0.2596332402234637, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.06830708661417324, "calib/std_conf": 0.19904568830411037, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1547.0, "completions/max_terminated_length": 1547.0, "completions/mean_length": 452.2421875, "completions/mean_terminated_length": 454.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.05973333333333333, "grad_norm": 0.0054603926837444305, "learning_rate": 4.000000000000001e-06, "loss": -0.0393, "num_tokens": 11449456.0, "reward": 1.1504205465316772, "reward_std": 0.33952397108078003, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7038324475288391, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8580799102783203, "step": 56 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5687116564417178, "calib/avg_num_step_conf": 2.39453125, "calib/ece": 0.16990777338603427, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.04262740286298575, "calib/mean_conf": 0.25973649538866933, "calib/mu_c": 0.2872, "calib/mu_w": 0.24457259713701426, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.03695652173913043, "calib/std_conf": 0.19891979247216385, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1169.0, "completions/max_terminated_length": 1169.0, "completions/mean_length": 433.8671875, "completions/mean_terminated_length": 435.56866455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.0608, "grad_norm": 0.005786188878118992, "learning_rate": 3.972222222222223e-06, "loss": -0.0085, "num_tokens": 11616886.0, "reward": 1.2042686939239502, "reward_std": 0.37658119201660156, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7032498717308044, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.8449498414993286, "step": 57 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.557451306054658, "calib/avg_num_step_conf": 2.296875, "calib/ece": 0.15416205533596838, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.038548203231164146, "calib/mean_conf": 0.2767470355731226, "calib/mu_c": 0.3040202702702703, "calib/mu_w": 0.26547206703910614, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0692094861660079, "calib/std_conf": 0.20202418661481644, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2153.0, "completions/max_terminated_length": 2153.0, "completions/mean_length": 492.640625, "completions/mean_terminated_length": 492.640625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.06186666666666667, "grad_norm": 0.005370710510760546, "learning_rate": 3.944444444444445e-06, "loss": 0.0023, "num_tokens": 11798890.0, "reward": 1.1722075939178467, "reward_std": 0.3359895348548889, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7481820583343506, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.864341139793396, "step": 58 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5563291139240506, "calib/avg_num_step_conf": 2.15625, "calib/ece": 0.17481159420289852, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.03129324894514768, "calib/mean_conf": 0.26945718050065876, "calib/mu_c": 0.28900000000000003, "calib/mu_w": 0.25770675105485236, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.03438735177865613, "calib/std_conf": 0.19475026301124918, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 495.70703125, "completions/mean_terminated_length": 497.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.06293333333333333, "grad_norm": 0.005067238584160805, "learning_rate": 3.916666666666667e-06, "loss": 0.0049, "num_tokens": 11981607.0, "reward": 1.2288419008255005, "reward_std": 0.3457253873348236, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.7050671577453613, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8583583831787109, "step": 59 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5622261174408414, "calib/avg_num_step_conf": 2.0078125, "calib/ece": 0.1927982283464567, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.039012691296433616, "calib/mean_conf": 0.2438159448818898, "calib/mu_c": 0.2688516483516484, "calib/mu_w": 0.22983895705521476, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.039173228346456694, "calib/std_conf": 0.183887057063849, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2320.0, "completions/max_terminated_length": 2320.0, "completions/mean_length": 488.7265625, "completions/mean_terminated_length": 488.7265625, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.064, "grad_norm": 0.0051851640455424786, "learning_rate": 3.88888888888889e-06, "loss": 0.0367, "num_tokens": 12165145.0, "reward": 1.229191780090332, "reward_std": 0.33426523208618164, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7230992317199707, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8768181800842285, "step": 60 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5192792338709677, "calib/avg_num_step_conf": 1.9140625, "calib/ece": 0.22458095238095233, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013787373991935525, "calib/mean_conf": 0.2974428571428572, "calib/mu_c": 0.30444596774193555, "calib/mu_w": 0.29065859375, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.014980158730158727, "calib/std_conf": 0.19797366304259614, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1870.0, "completions/max_terminated_length": 1870.0, "completions/mean_length": 472.359375, "completions/mean_terminated_length": 474.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.06506666666666666, "grad_norm": 0.00622786907479167, "learning_rate": 3.861111111111112e-06, "loss": 0.0099, "num_tokens": 12339701.0, "reward": 1.3256220817565918, "reward_std": 0.33889907598495483, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6663312911987305, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8557630777359009, "step": 61 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5501989150090416, "calib/avg_num_step_conf": 1.69140625, "calib/ece": 0.15170236220472444, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02735201446654595, "calib/mean_conf": 0.26299055118110237, "calib/mu_c": 0.2818354430379746, "calib/mu_w": 0.25448342857142864, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05183464566929135, "calib/std_conf": 0.1917988954313152, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2374.0, "completions/max_terminated_length": 2374.0, "completions/mean_length": 524.0390625, "completions/mean_terminated_length": 524.0390625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.06613333333333334, "grad_norm": 0.004966284614056349, "learning_rate": 3.833333333333334e-06, "loss": 0.0214, "num_tokens": 12530503.0, "reward": 1.198150873184204, "reward_std": 0.3018414378166199, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.7484416961669922, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8800951242446899, "step": 62 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5499891563652135, "calib/avg_num_step_conf": 1.48046875, "calib/ece": 0.16085365853658537, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02294946866189551, "calib/mean_conf": 0.24890243902439027, "calib/mu_c": 0.2637356321839081, "calib/mu_w": 0.2407861635220126, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.028048780487804886, "calib/std_conf": 0.17309360758249012, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2724.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 542.71484375, "completions/mean_terminated_length": 546.9881591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.0672, "grad_norm": 0.005206855479627848, "learning_rate": 3.8055555555555556e-06, "loss": 0.0564, "num_tokens": 12727646.0, "reward": 1.1937217712402344, "reward_std": 0.34111452102661133, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6999675631523132, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.8546397686004639, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48933708567854906, "calib/avg_num_step_conf": 1.62890625, "calib/ece": 0.2630434782608695, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0034543464665415524, "calib/mean_conf": 0.2290513833992095, "calib/mu_c": 0.22727642276422766, "calib/mu_w": 0.2307307692307692, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0029644268774703555, "calib/std_conf": 0.1556412507684706, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1450.0, "completions/max_terminated_length": 1450.0, "completions/mean_length": 488.3828125, "completions/mean_terminated_length": 490.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.06826666666666667, "grad_norm": 0.0052140094339847565, "learning_rate": 3.777777777777778e-06, "loss": 0.0193, "num_tokens": 12906016.0, "reward": 1.3204822540283203, "reward_std": 0.3196646571159363, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6467587947845459, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8852865695953369, "step": 64 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.517916032470827, "calib/avg_num_step_conf": 1.65625, "calib/ece": 0.20532690157480313, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00885664408929479, "calib/mean_conf": 0.23859435826771655, "calib/mu_c": 0.24368518518518517, "calib/mu_w": 0.23482854109589038, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009362204724409452, "calib/std_conf": 0.15915199787491316, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 445.8984375, "completions/mean_terminated_length": 447.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.06933333333333333, "grad_norm": 0.0053698234260082245, "learning_rate": 3.7500000000000005e-06, "loss": -0.0045, "num_tokens": 13074758.0, "reward": 1.2875516414642334, "reward_std": 0.2792130708694458, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6870719194412231, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9073121547698975, "step": 65 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6001217285453438, "calib/avg_num_step_conf": 1.6015625, "calib/ece": 0.17750875, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.053684851896936475, "calib/mean_conf": 0.2036023611111111, "calib/mu_c": 0.23747494623655913, "calib/mu_w": 0.18379009433962265, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.006031746031746031, "calib/std_conf": 0.15583048856736414, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 541.546875, "completions/mean_terminated_length": 543.6705932617188, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.0704, "grad_norm": 0.0056963106617331505, "learning_rate": 3.7222222222222225e-06, "loss": 0.0118, "num_tokens": 13269314.0, "reward": 1.223191261291504, "reward_std": 0.3150969445705414, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.7096078395843506, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8672992587089539, "step": 66 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5755578755056767, "calib/avg_num_step_conf": 1.65234375, "calib/ece": 0.21422980392156862, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03657393971029624, "calib/mean_conf": 0.19622509803921567, "calib/mu_c": 0.21888659793814433, "calib/mu_w": 0.1823126582278481, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.015031372549019605, "calib/std_conf": 0.1574204142450045, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1028.0, "completions/max_terminated_length": 1028.0, "completions/mean_length": 495.73828125, "completions/mean_terminated_length": 497.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.07146666666666666, "grad_norm": 0.004854717291891575, "learning_rate": 3.694444444444445e-06, "loss": -0.0018, "num_tokens": 13450799.0, "reward": 1.2532745599746704, "reward_std": 0.24554133415222168, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.7047981023788452, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9113144874572754, "step": 67 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5240679565832941, "calib/avg_num_step_conf": 1.76953125, "calib/ece": 0.20631909448818894, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011602204543922356, "calib/mean_conf": 0.21667303149606304, "calib/mu_c": 0.21741758241758247, "calib/mu_w": 0.21625736196319023, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.032362204724409455, "calib/std_conf": 0.15226898440999392, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2408.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 479.2890625, "completions/mean_terminated_length": 479.2890625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.07253333333333334, "grad_norm": 0.005828273016959429, "learning_rate": 3.6666666666666666e-06, "loss": 0.0098, "num_tokens": 13627153.0, "reward": 1.2230072021484375, "reward_std": 0.28815874457359314, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7085984945297241, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8810821771621704, "step": 68 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5680162510507145, "calib/avg_num_step_conf": 1.4609375, "calib/ece": 0.14117647058823532, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04762293359484454, "calib/mean_conf": 0.19390196078431376, "calib/mu_c": 0.22602409638554222, "calib/mu_w": 0.17840116279069768, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0047941176470588254, "calib/std_conf": 0.14267239381513033, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1916.0, "completions/max_terminated_length": 1916.0, "completions/mean_length": 553.62890625, "completions/mean_terminated_length": 553.62890625, "completions/min_length": 207.0, "completions/min_terminated_length": 207.0, "epoch": 0.0736, "grad_norm": 0.005274781957268715, "learning_rate": 3.638888888888889e-06, "loss": 0.0347, "num_tokens": 13822946.0, "reward": 1.2179726362228394, "reward_std": 0.24709612131118774, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.7536987662315369, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8863679766654968, "step": 69 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5386532030686513, "calib/avg_num_step_conf": 1.57421875, "calib/ece": 0.22510019841269846, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01616616943151275, "calib/mean_conf": 0.18362996031746034, "calib/mu_c": 0.19331683168316838, "calib/mu_w": 0.17715066225165563, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.00396825396825397, "calib/std_conf": 0.13254462121225444, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2252.0, "completions/max_terminated_length": 2252.0, "completions/mean_length": 509.3359375, "completions/mean_terminated_length": 511.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.07466666666666667, "grad_norm": 0.005372477695345879, "learning_rate": 3.6111111111111115e-06, "loss": -0.0503, "num_tokens": 14009896.0, "reward": 1.246382236480713, "reward_std": 0.310563325881958, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6737847328186035, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.9020219445228577, "step": 70 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5616416480357713, "calib/avg_num_step_conf": 1.86328125, "calib/ece": 0.21790703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028257361865218822, "calib/mean_conf": 0.18287421875000004, "calib/mu_c": 0.19998316831683172, "calib/mu_w": 0.1717258064516129, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003125, "calib/std_conf": 0.1332470140648643, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1373.0, "completions/max_terminated_length": 1373.0, "completions/mean_length": 522.53125, "completions/mean_terminated_length": 524.5804443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.07573333333333333, "grad_norm": 0.005132544785737991, "learning_rate": 3.5833333333333335e-06, "loss": 0.0063, "num_tokens": 14197640.0, "reward": 1.2799885272979736, "reward_std": 0.2574692368507385, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.7082030177116394, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9301106929779053, "step": 71 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5610800242702083, "calib/avg_num_step_conf": 1.93359375, "calib/ece": 0.1971456692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02938009842917816, "calib/mean_conf": 0.18120078740157483, "calib/mu_c": 0.20005494505494503, "calib/mu_w": 0.17067484662576687, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.010039370078740154, "calib/std_conf": 0.14263077656865242, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 484.7734375, "completions/mean_terminated_length": 486.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.0768, "grad_norm": 0.005084159318357706, "learning_rate": 3.555555555555556e-06, "loss": -0.0099, "num_tokens": 14375718.0, "reward": 1.24046790599823, "reward_std": 0.282471239566803, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7184491157531738, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9218480587005615, "step": 72 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5334898665348492, "calib/avg_num_step_conf": 1.72265625, "calib/ece": 0.30389555686274505, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0090898781512605, "calib/mean_conf": 0.17375150196078434, "calib/mu_c": 0.17859943697478994, "calib/mu_w": 0.16950955882352944, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005490196078431373, "calib/std_conf": 0.12611939174361494, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1068.0, "completions/max_terminated_length": 1068.0, "completions/mean_length": 494.73828125, "completions/mean_terminated_length": 496.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.07786666666666667, "grad_norm": 0.0048870607279241085, "learning_rate": 3.5277777777777784e-06, "loss": -0.0065, "num_tokens": 14558971.0, "reward": 1.3024544715881348, "reward_std": 0.2780904769897461, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6398916244506836, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8894093632698059, "step": 73 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5445133853876185, "calib/avg_num_step_conf": 2.12109375, "calib/ece": 0.19820119521912347, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021587702175125484, "calib/mean_conf": 0.16195816733067728, "calib/mu_c": 0.17597727272727273, "calib/mu_w": 0.15438957055214725, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.004780876494023905, "calib/std_conf": 0.12229669381367947, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 560.17578125, "completions/mean_terminated_length": 560.17578125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07893333333333333, "grad_norm": 0.005485473200678825, "learning_rate": 3.5e-06, "loss": 0.037, "num_tokens": 14755872.0, "reward": 1.210759162902832, "reward_std": 0.3039107322692871, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7089241743087769, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8767507076263428, "step": 74 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5822851153039832, "calib/avg_num_step_conf": 2.14453125, "calib/ece": 0.4596764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02781348270440248, "calib/mean_conf": 0.16385294117647062, "calib/mu_c": 0.17432389937106918, "calib/mu_w": 0.1465104166666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11003423237916028, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 514.09765625, "completions/mean_terminated_length": 516.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.08, "grad_norm": 0.005142589565366507, "learning_rate": 3.4722222222222224e-06, "loss": 0.0084, "num_tokens": 14941801.0, "reward": 1.414496660232544, "reward_std": 0.24992991983890533, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.5492589473724365, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8844687342643738, "step": 75 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5394428694687539, "calib/avg_num_step_conf": 2.42578125, "calib/ece": 0.37201381740196077, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.022696512830488097, "calib/mean_conf": 0.14955481004901963, "calib/mu_c": 0.16041353383458648, "calib/mu_w": 0.13771702100409838, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11541540821168365, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 562.54296875, "completions/mean_terminated_length": 562.54296875, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.08106666666666666, "grad_norm": 0.006148222368210554, "learning_rate": 3.444444444444445e-06, "loss": 0.0345, "num_tokens": 15138436.0, "reward": 1.3471288681030273, "reward_std": 0.2481614351272583, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6013270616531372, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9217990636825562, "step": 76 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.45207688338493285, "calib/avg_num_step_conf": 2.203125, "calib/ece": 0.45151673228346456, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.023998690660474747, "calib/mean_conf": 0.16383759842519685, "calib/mu_c": 0.15420032894736843, "calib/mu_w": 0.17819901960784318, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.008464566929133856, "calib/std_conf": 0.12103179437546255, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 591.92578125, "completions/mean_terminated_length": 591.92578125, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.08213333333333334, "grad_norm": 0.005385115742683411, "learning_rate": 3.416666666666667e-06, "loss": 0.0317, "num_tokens": 15344201.0, "reward": 1.381176233291626, "reward_std": 0.262881875038147, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5356004238128662, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.897254467010498, "step": 77 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5417076771653543, "calib/avg_num_step_conf": 2.65625, "calib/ece": 0.34595058823529407, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019628457185039305, "calib/mean_conf": 0.1560101960784314, "calib/mu_c": 0.16578593750000004, "calib/mu_w": 0.14615748031496073, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11908328332149824, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2025.0, "completions/max_terminated_length": 2025.0, "completions/mean_length": 632.24609375, "completions/mean_terminated_length": 634.7255249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0832, "grad_norm": 0.004347877111285925, "learning_rate": 3.3888888888888893e-06, "loss": -0.0022, "num_tokens": 15563648.0, "reward": 1.344900369644165, "reward_std": 0.2374875694513321, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6235101222991943, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9372687339782715, "step": 78 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5324177579048228, "calib/avg_num_step_conf": 2.2578125, "calib/ece": 0.4283722656249999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01839741296710315, "calib/mean_conf": 0.17709648437500003, "calib/mu_c": 0.18435483870967745, "calib/mu_w": 0.1659574257425743, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12327377126928858, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1236.0, "completions/max_terminated_length": 1236.0, "completions/mean_length": 596.4453125, "completions/mean_terminated_length": 598.7843627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.08426666666666667, "grad_norm": 0.004410876892507076, "learning_rate": 3.3611111111111117e-06, "loss": 0.014, "num_tokens": 15772282.0, "reward": 1.4093378782272339, "reward_std": 0.23596200346946716, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.5636845827102661, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8974823951721191, "step": 79 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5459320091673032, "calib/avg_num_step_conf": 2.40625, "calib/ece": 0.430138671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015111471861471854, "calib/mean_conf": 0.171423828125, "calib/mu_c": 0.1774448051948052, "calib/mu_w": 0.16233333333333336, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.12099904384708153, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1627.0, "completions/max_terminated_length": 1627.0, "completions/mean_length": 570.23828125, "completions/mean_terminated_length": 572.4745483398438, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.08533333333333333, "grad_norm": 0.004657160025089979, "learning_rate": 3.3333333333333333e-06, "loss": -0.0026, "num_tokens": 15969991.0, "reward": 1.3999781608581543, "reward_std": 0.2511942982673645, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.561883270740509, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8839585185050964, "step": 80 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5228434608166699, "calib/avg_num_step_conf": 2.41015625, "calib/ece": 0.41380065616797895, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010194984792596895, "calib/mean_conf": 0.19328595800524934, "calib/mu_c": 0.19733986928104577, "calib/mu_w": 0.18714488448844888, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.002362204724409449, "calib/std_conf": 0.12704253680914465, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 639.5703125, "completions/mean_terminated_length": 642.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.0864, "grad_norm": 0.004149497952312231, "learning_rate": 3.3055555555555558e-06, "loss": 0.022, "num_tokens": 16189537.0, "reward": 1.4040427207946777, "reward_std": 0.2714318037033081, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5699639320373535, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9043678045272827, "step": 81 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47222570532915353, "calib/avg_num_step_conf": 2.53125, "calib/ece": 0.37285294117647066, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017988244514106605, "calib/mean_conf": 0.22479411764705884, "calib/mu_c": 0.2170344827586207, "calib/mu_w": 0.2350227272727273, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.014509803921568625, "calib/std_conf": 0.1279360355063584, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1886.0, "completions/max_terminated_length": 1886.0, "completions/mean_length": 587.3046875, "completions/mean_terminated_length": 589.6078491210938, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.08746666666666666, "grad_norm": 0.004640071652829647, "learning_rate": 3.277777777777778e-06, "loss": 0.0542, "num_tokens": 16395007.0, "reward": 1.3727104663848877, "reward_std": 0.26982933282852173, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5919941067695618, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8693534731864929, "step": 82 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6083211143695014, "calib/avg_num_step_conf": 2.296875, "calib/ece": 0.30937421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.045839247311827896, "calib/mean_conf": 0.21093828125000003, "calib/mu_c": 0.23314166666666666, "calib/mu_w": 0.18730241935483877, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00234375, "calib/std_conf": 0.13373396902823867, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1952.0, "completions/max_terminated_length": 1952.0, "completions/mean_length": 671.09765625, "completions/mean_terminated_length": 673.7294311523438, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.08853333333333334, "grad_norm": 0.004066772758960724, "learning_rate": 3.2500000000000002e-06, "loss": 0.016, "num_tokens": 16623640.0, "reward": 1.378986120223999, "reward_std": 0.2268155962228775, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6610163450241089, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.936099648475647, "step": 83 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5661490683229813, "calib/avg_num_step_conf": 2.19921875, "calib/ece": 0.32683333333333336, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03280822981366452, "calib/mean_conf": 0.2221862745098039, "calib/mu_c": 0.2369821428571428, "calib/mu_w": 0.2041739130434783, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.13624174595557223, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1615.0, "completions/max_terminated_length": 1615.0, "completions/mean_length": 593.65234375, "completions/mean_terminated_length": 595.9804077148438, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.0896, "grad_norm": 0.004806416109204292, "learning_rate": 3.2222222222222227e-06, "loss": 0.0066, "num_tokens": 16831103.0, "reward": 1.3885453939437866, "reward_std": 0.2803252935409546, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6362625360488892, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9082188010215759, "step": 84 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5601453855878634, "calib/avg_num_step_conf": 2.36328125, "calib/ece": 0.21920688405797098, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020185141698272208, "calib/mean_conf": 0.24956781949934123, "calib/mu_c": 0.26073746312684365, "calib/mu_w": 0.24055232142857144, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.01106719367588933, "calib/std_conf": 0.14309457774759896, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2811.0, "completions/max_terminated_length": 2811.0, "completions/mean_length": 624.90234375, "completions/mean_terminated_length": 627.3529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.09066666666666667, "grad_norm": 0.0047371708787977695, "learning_rate": 3.1944444444444443e-06, "loss": 0.0222, "num_tokens": 17048470.0, "reward": 1.3084673881530762, "reward_std": 0.2495461404323578, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6849251985549927, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9155815839767456, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5725414433503677, "calib/avg_num_step_conf": 2.2109375, "calib/ece": 0.21341450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028003184594291325, "calib/mean_conf": 0.2587423529411765, "calib/mu_c": 0.2743362831858407, "calib/mu_w": 0.24633309859154937, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.014509803921568629, "calib/std_conf": 0.13502763061595127, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1672.0, "completions/max_terminated_length": 1672.0, "completions/mean_length": 624.00390625, "completions/mean_terminated_length": 626.4509887695312, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.09173333333333333, "grad_norm": 0.005471901968121529, "learning_rate": 3.1666666666666667e-06, "loss": 0.0116, "num_tokens": 17263295.0, "reward": 1.313936471939087, "reward_std": 0.2574613690376282, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6988636255264282, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9158311486244202, "step": 86 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5514144125542021, "calib/avg_num_step_conf": 1.90625, "calib/ece": 0.39824015748031494, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026239039163053146, "calib/mean_conf": 0.259240157480315, "calib/mu_c": 0.26822754491017964, "calib/mu_w": 0.2419885057471265, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.12253291941406065, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 540.47265625, "completions/mean_terminated_length": 540.47265625, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.0928, "grad_norm": 0.004846483934670687, "learning_rate": 3.138888888888889e-06, "loss": 0.0196, "num_tokens": 17456720.0, "reward": 1.4581712484359741, "reward_std": 0.25667858123779297, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.5915886163711548, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8729454874992371, "step": 87 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5230451366815003, "calib/avg_num_step_conf": 2.203125, "calib/ece": 0.3017470355731225, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019502797202797184, "calib/mean_conf": 0.26663241106719365, "calib/mu_c": 0.2751118881118881, "calib/mu_w": 0.2556090909090909, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0015810276679841919, "calib/std_conf": 0.14371236663911982, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2261.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 654.75390625, "completions/mean_terminated_length": 654.75390625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.09386666666666667, "grad_norm": 0.0045557511039078236, "learning_rate": 3.1111111111111116e-06, "loss": 0.0455, "num_tokens": 17683753.0, "reward": 1.395997166633606, "reward_std": 0.2871512472629547, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6374142169952393, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8872852921485901, "step": 88 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.555745341614907, "calib/avg_num_step_conf": 2.046875, "calib/ece": 0.19541455384313725, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025722160959627316, "calib/mean_conf": 0.2751736814509804, "calib/mu_c": 0.289295652173913, "calib/mu_w": 0.2635734912142857, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009803921568627449, "calib/std_conf": 0.1283167888660835, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1837.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 628.22265625, "completions/mean_terminated_length": 630.6863403320312, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.09493333333333333, "grad_norm": 0.004618597216904163, "learning_rate": 3.0833333333333336e-06, "loss": 0.0243, "num_tokens": 17903034.0, "reward": 1.3240776062011719, "reward_std": 0.22102946043014526, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.709104061126709, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8952895402908325, "step": 89 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.535424836601307, "calib/avg_num_step_conf": 2.0234375, "calib/ece": 0.3402944664031621, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010287254901960863, "calib/mean_conf": 0.28757114624505936, "calib/mu_c": 0.29163725490196085, "calib/mu_w": 0.28135, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.011561264822134388, "calib/std_conf": 0.13380067056685727, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2446.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 613.71875, "completions/mean_terminated_length": 613.71875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.096, "grad_norm": 0.005011430941522121, "learning_rate": 3.055555555555556e-06, "loss": 0.0563, "num_tokens": 18113034.0, "reward": 1.4468196630477905, "reward_std": 0.23396410048007965, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6398017406463623, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9154877662658691, "step": 90 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5235210504941112, "calib/avg_num_step_conf": 2.03515625, "calib/ece": 0.35872470588235295, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021795234872072522, "calib/mean_conf": 0.2922556862745099, "calib/mu_c": 0.29986265060240963, "calib/mu_w": 0.2780674157303371, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.12715683799174327, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1583.0, "completions/max_terminated_length": 1583.0, "completions/mean_length": 605.078125, "completions/mean_terminated_length": 607.4509887695312, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.09706666666666666, "grad_norm": 0.00484122522175312, "learning_rate": 3.0277777777777776e-06, "loss": -0.0012, "num_tokens": 18325214.0, "reward": 1.488823413848877, "reward_std": 0.2453734278678894, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6353552341461182, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8955209255218506, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5746240243670284, "calib/avg_num_step_conf": 1.984375, "calib/ece": 0.30334374999999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02283469763309859, "calib/mean_conf": 0.303453125, "calib/mu_c": 0.312640522875817, "calib/mu_w": 0.2898058252427184, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0045703125, "calib/std_conf": 0.11596806264111846, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 553.953125, "completions/mean_terminated_length": 556.1255493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.09813333333333334, "grad_norm": 0.005605071783065796, "learning_rate": 3e-06, "loss": -0.0133, "num_tokens": 18523314.0, "reward": 1.4612683057785034, "reward_std": 0.21096697449684143, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6705144643783569, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9134193062782288, "step": 92 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5141875, "calib/avg_num_step_conf": 1.859375, "calib/ece": 0.22534505928853754, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009557249999999073, "calib/mean_conf": 0.29639407114624505, "calib/mu_c": 0.2968775999999999, "calib/mu_w": 0.295921875, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.01383399209486166, "calib/std_conf": 0.10620626886336024, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2470.0, "completions/max_terminated_length": 2470.0, "completions/mean_length": 606.7578125, "completions/mean_terminated_length": 606.7578125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.0992, "grad_norm": 0.004706274252384901, "learning_rate": 2.9722222222222225e-06, "loss": 0.0151, "num_tokens": 18733988.0, "reward": 1.3505432605743408, "reward_std": 0.26323801279067993, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6854676008224487, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9015504717826843, "step": 93 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5609452736318409, "calib/avg_num_step_conf": 1.8046875, "calib/ece": 0.22904621513944226, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023001575456053025, "calib/mean_conf": 0.3112908366533865, "calib/mu_c": 0.32201268656716414, "calib/mu_w": 0.2990111111111111, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0032362549800796815, "calib/std_conf": 0.11176713392938932, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 613.76953125, "completions/mean_terminated_length": 613.76953125, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.10026666666666667, "grad_norm": 0.005544749088585377, "learning_rate": 2.944444444444445e-06, "loss": 0.0675, "num_tokens": 18949361.0, "reward": 1.3829312324523926, "reward_std": 0.30212563276290894, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6831309795379639, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8998380303382874, "step": 94 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5020877518557795, "calib/avg_num_step_conf": 1.8984375, "calib/ece": 0.3291289062500001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012245758218451752, "calib/mean_conf": 0.32258984375, "calib/mu_c": 0.3181890243902439, "calib/mu_w": 0.33043478260869563, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005546875, "calib/std_conf": 0.102866416971724, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1538.0, "completions/max_terminated_length": 1538.0, "completions/mean_length": 585.6328125, "completions/mean_terminated_length": 587.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.10133333333333333, "grad_norm": 0.005202965810894966, "learning_rate": 2.916666666666667e-06, "loss": 0.0122, "num_tokens": 19154979.0, "reward": 1.493459701538086, "reward_std": 0.2513244152069092, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6524089574813843, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9065208435058594, "step": 95 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6400238473767886, "calib/avg_num_step_conf": 1.78125, "calib/ece": 0.40860606060606064, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04988881822999475, "calib/mean_conf": 0.3273623188405797, "calib/mu_c": 0.34077117117117117, "calib/mu_w": 0.2908823529411764, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0023715415019762848, "calib/std_conf": 0.11390679160950982, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 565.25, "completions/mean_terminated_length": 565.25, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.1024, "grad_norm": 0.006645775865763426, "learning_rate": 2.888888888888889e-06, "loss": 0.094, "num_tokens": 19355067.0, "reward": 1.5457134246826172, "reward_std": 0.2587122321128845, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.6358581781387329, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8392626643180847, "step": 96 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6319811320754717, "calib/avg_num_step_conf": 1.75390625, "calib/ece": 0.26446875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.047192452830188636, "calib/mean_conf": 0.326859375, "calib/mu_c": 0.3464, "calib/mu_w": 0.29920754716981135, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002695312500000002, "calib/std_conf": 0.11486052917390453, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 554.2578125, "completions/mean_terminated_length": 556.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.10346666666666667, "grad_norm": 0.006527770310640335, "learning_rate": 2.861111111111111e-06, "loss": 0.0134, "num_tokens": 19551597.0, "reward": 1.4724822044372559, "reward_std": 0.24671298265457153, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.699970006942749, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.94623863697052, "step": 97 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5448076923076923, "calib/avg_num_step_conf": 1.5703125, "calib/ece": 0.25709514566929137, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005448144230769225, "calib/mean_conf": 0.3551292637795275, "calib/mu_c": 0.35736, "calib/mu_w": 0.3519118557692308, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01083661417322835, "calib/std_conf": 0.1257529834772536, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2692.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 591.34375, "completions/mean_terminated_length": 593.6627807617188, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.10453333333333334, "grad_norm": 0.005861423909664154, "learning_rate": 2.8333333333333335e-06, "loss": 0.0089, "num_tokens": 19758733.0, "reward": 1.4559632539749146, "reward_std": 0.2572120726108551, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6842094659805298, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.905434250831604, "step": 98 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5618983442433576, "calib/avg_num_step_conf": 1.66015625, "calib/ece": 0.1394718342848777, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010118537296120445, "calib/mean_conf": 0.342750095154443, "calib/mu_c": 0.3486292452830189, "calib/mu_w": 0.3385107079868985, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03162479871175524, "calib/std_conf": 0.11997389444582301, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1862.0, "completions/max_terminated_length": 1862.0, "completions/mean_length": 663.7421875, "completions/mean_terminated_length": 668.968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1056, "grad_norm": 0.005669788923114538, "learning_rate": 2.805555555555556e-06, "loss": 0.001, "num_tokens": 19984019.0, "reward": 1.3039848804473877, "reward_std": 0.32023513317108154, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.7289392352104187, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9205611944198608, "step": 99 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5417953667953668, "calib/avg_num_step_conf": 1.796875, "calib/ece": 0.2451225296442688, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0061968468468468485, "calib/mean_conf": 0.3536916996047431, "calib/mu_c": 0.35626351351351354, "calib/mu_w": 0.3500666666666667, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.006916996047430832, "calib/std_conf": 0.11269830923059786, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 647.5546875, "completions/mean_terminated_length": 647.5546875, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 0.10666666666666667, "grad_norm": 0.005725045222789049, "learning_rate": 2.7777777777777783e-06, "loss": 0.0159, "num_tokens": 20206769.0, "reward": 1.4439170360565186, "reward_std": 0.25288286805152893, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6800872087478638, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9264310598373413, "step": 100 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4667576283800546, "calib/avg_num_step_conf": 1.6953125, "calib/ece": 0.1910352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0248276482262465, "calib/mean_conf": 0.3417490196078431, "calib/mu_c": 0.3282155172413794, "calib/mu_w": 0.3530431654676259, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03894117647058824, "calib/std_conf": 0.10193992727851357, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 653.72265625, "completions/mean_terminated_length": 653.72265625, "completions/min_length": 244.0, "completions/min_terminated_length": 244.0, "epoch": 0.10773333333333333, "grad_norm": 0.00673833629116416, "learning_rate": 2.7500000000000004e-06, "loss": -0.0, "num_tokens": 20430682.0, "reward": 1.3407317399978638, "reward_std": 0.2800105810165405, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.7115739583969116, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9366538524627686, "step": 101 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5668928571428571, "calib/avg_num_step_conf": 1.64453125, "calib/ece": 0.33908392156862743, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010896642857142869, "calib/mean_conf": 0.34719058823529414, "calib/mu_c": 0.35060914285714284, "calib/mu_w": 0.3397125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.104142325640683, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1304.0, "completions/max_terminated_length": 1304.0, "completions/mean_length": 515.89453125, "completions/mean_terminated_length": 517.9176635742188, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.1088, "grad_norm": 0.007361991330981255, "learning_rate": 2.7222222222222224e-06, "loss": 0.01, "num_tokens": 20619015.0, "reward": 1.541640043258667, "reward_std": 0.17958354949951172, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6573126316070557, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9113100171089172, "step": 102 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5523360867320599, "calib/avg_num_step_conf": 1.66015625, "calib/ece": 0.23990632411067192, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008410286525554866, "calib/mean_conf": 0.36594347826086954, "calib/mu_c": 0.3694006711409395, "calib/mu_w": 0.36099038461538463, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.008458498023715419, "calib/std_conf": 0.1126632795205633, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 676.81640625, "completions/mean_terminated_length": 679.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.10986666666666667, "grad_norm": 0.005382324568927288, "learning_rate": 2.6944444444444444e-06, "loss": 0.0548, "num_tokens": 20846400.0, "reward": 1.440127968788147, "reward_std": 0.23319774866104126, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6840233206748962, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8924654722213745, "step": 103 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5241526717557251, "calib/avg_num_step_conf": 1.62109375, "calib/ece": 0.1861375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0061585954198473125, "calib/mean_conf": 0.36055546875, "calib/mu_c": 0.357404, "calib/mu_w": 0.3635625954198473, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029205859375000007, "calib/std_conf": 0.13196414570383416, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 586.37109375, "completions/mean_terminated_length": 588.6705932617188, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.11093333333333333, "grad_norm": 0.006029161624610424, "learning_rate": 2.666666666666667e-06, "loss": 0.0231, "num_tokens": 21052759.0, "reward": 1.3788419961929321, "reward_std": 0.2469767928123474, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7133313417434692, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9355801939964294, "step": 104 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5728983136169142, "calib/avg_num_step_conf": 1.46875, "calib/ece": 0.20540316205533596, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019087465391392, "calib/mean_conf": 0.37214624505928856, "calib/mu_c": 0.3808978102189782, "calib/mu_w": 0.36181034482758617, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.018023715415019768, "calib/std_conf": 0.12139836733918537, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 619.8359375, "completions/mean_terminated_length": 622.2667236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 246.0, "epoch": 0.112, "grad_norm": 0.006988944485783577, "learning_rate": 2.6388888888888893e-06, "loss": 0.0399, "num_tokens": 21266765.0, "reward": 1.396331548690796, "reward_std": 0.281770795583725, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6915109753608704, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8882417678833008, "step": 105 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.549142892892893, "calib/avg_num_step_conf": 1.484375, "calib/ece": 0.19350313725490204, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013794650900900873, "calib/mean_conf": 0.4202223529411765, "calib/mu_c": 0.4262270833333333, "calib/mu_w": 0.41243243243243244, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.024509803921568672, "calib/std_conf": 0.14519558523224546, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1682.0, "completions/max_terminated_length": 1682.0, "completions/mean_length": 577.1171875, "completions/mean_terminated_length": 579.3804321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.11306666666666666, "grad_norm": 0.0071154567413032055, "learning_rate": 2.6111111111111113e-06, "loss": -0.0393, "num_tokens": 21468659.0, "reward": 1.4477204084396362, "reward_std": 0.2633710205554962, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7140500545501709, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9221565127372742, "step": 106 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5438095238095237, "calib/avg_num_step_conf": 1.5234375, "calib/ece": 0.16790849673202618, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023317460317460292, "calib/mean_conf": 0.4320653594771241, "calib/mu_c": 0.4416666666666666, "calib/mu_w": 0.4183492063492063, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005869281045751641, "calib/std_conf": 0.1471456975062049, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2762.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 572.26953125, "completions/mean_terminated_length": 572.26953125, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 0.11413333333333334, "grad_norm": 0.008440433070063591, "learning_rate": 2.5833333333333337e-06, "loss": 0.0671, "num_tokens": 21669344.0, "reward": 1.4780728816986084, "reward_std": 0.2975276708602905, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7202157974243164, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9327974319458008, "step": 107 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5260141093474426, "calib/avg_num_step_conf": 1.4609375, "calib/ece": 0.30506797385620915, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017657206990540353, "calib/mean_conf": 0.44748104575163394, "calib/mu_c": 0.45205114638447974, "calib/mu_w": 0.4343939393939394, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005686274509803921, "calib/std_conf": 0.15370237052146365, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 637.484375, "completions/mean_terminated_length": 637.484375, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.1152, "grad_norm": 0.006027248688042164, "learning_rate": 2.5555555555555557e-06, "loss": 0.0228, "num_tokens": 21885340.0, "reward": 1.6121673583984375, "reward_std": 0.2332625687122345, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.702305018901825, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.895621657371521, "step": 108 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5266529183852018, "calib/avg_num_step_conf": 1.328125, "calib/ece": 0.15022529644268773, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00822453443319593, "calib/mean_conf": 0.46661264822134385, "calib/mu_c": 0.47070866141732287, "calib/mu_w": 0.46248412698412694, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.057430830039525704, "calib/std_conf": 0.15664296958074397, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1922.0, "completions/max_terminated_length": 1922.0, "completions/mean_length": 589.1328125, "completions/mean_terminated_length": 593.7716674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 244.0, "epoch": 0.11626666666666667, "grad_norm": 0.0065979138016700745, "learning_rate": 2.5277777777777778e-06, "loss": -0.0103, "num_tokens": 22090326.0, "reward": 1.3793652057647705, "reward_std": 0.21767696738243103, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7197933793067932, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9075617790222168, "step": 109 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5017532467532467, "calib/avg_num_step_conf": 1.31640625, "calib/ece": 0.1660236220472441, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0028753246753246975, "calib/mean_conf": 0.49854330708661415, "calib/mu_c": 0.4996753246753247, "calib/mu_w": 0.4968, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.029133858267716542, "calib/std_conf": 0.1583895455025244, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 549.453125, "completions/mean_terminated_length": 553.779541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.11733333333333333, "grad_norm": 0.006974409334361553, "learning_rate": 2.5e-06, "loss": -0.0105, "num_tokens": 22285474.0, "reward": 1.4861176013946533, "reward_std": 0.3244120478630066, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7152194976806641, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9265310168266296, "step": 110 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5258145363408521, "calib/avg_num_step_conf": 1.24609375, "calib/ece": 0.13496062992125984, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01481954887218051, "calib/mean_conf": 0.5160629921259842, "calib/mu_c": 0.5227142857142858, "calib/mu_w": 0.5078947368421053, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.049921259842519695, "calib/std_conf": 0.1563893912683447, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2762.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 578.85546875, "completions/mean_terminated_length": 581.1255493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.1184, "grad_norm": 0.008550817146897316, "learning_rate": 2.4722222222222226e-06, "loss": 0.0042, "num_tokens": 22490637.0, "reward": 1.4284076690673828, "reward_std": 0.3240845203399658, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7224196195602417, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9000416994094849, "step": 111 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5529933196300103, "calib/avg_num_step_conf": 1.23046875, "calib/ece": 0.11254980079681276, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03227774922918819, "calib/mean_conf": 0.4994820717131474, "calib/mu_c": 0.513884892086331, "calib/mu_w": 0.48160714285714284, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.029123505976095664, "calib/std_conf": 0.1548014107500639, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2952.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 628.75390625, "completions/mean_terminated_length": 631.2196655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.11946666666666667, "grad_norm": 0.005662142299115658, "learning_rate": 2.4444444444444447e-06, "loss": 0.048, "num_tokens": 22709086.0, "reward": 1.4255472421646118, "reward_std": 0.3332613706588745, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7239608764648438, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8948922753334045, "step": 112 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4897715539494063, "calib/avg_num_step_conf": 1.16015625, "calib/ece": 0.16262845849802376, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006113835828600811, "calib/mean_conf": 0.5369762845849803, "calib/mu_c": 0.5344630872483223, "calib/mu_w": 0.5405769230769231, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0553359683794467, "calib/std_conf": 0.14319907723921993, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2745.0, "completions/max_terminated_length": 2745.0, "completions/mean_length": 541.96875, "completions/mean_terminated_length": 544.0941772460938, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.12053333333333334, "grad_norm": 0.008202227763831615, "learning_rate": 2.4166666666666667e-06, "loss": 0.0357, "num_tokens": 22902598.0, "reward": 1.4649397134780884, "reward_std": 0.30794620513916016, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7202385663986206, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8942817449569702, "step": 113 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5605185185185186, "calib/avg_num_step_conf": 1.1015625, "calib/ece": 0.1693333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03458888888888889, "calib/mean_conf": 0.5365490196078431, "calib/mu_c": 0.5467222222222222, "calib/mu_w": 0.5121333333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.13868801406082962, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 537.8203125, "completions/mean_terminated_length": 537.8203125, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 0.1216, "grad_norm": 0.010839417576789856, "learning_rate": 2.388888888888889e-06, "loss": 0.0086, "num_tokens": 23094872.0, "reward": 1.5901761054992676, "reward_std": 0.2540254592895508, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7430562376976013, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8855295777320862, "step": 114 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4970914041710502, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.11390625000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004249025310972154, "calib/mean_conf": 0.550546875, "calib/mu_c": 0.5486713286713287, "calib/mu_w": 0.5529203539823009, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05292968750000006, "calib/std_conf": 0.14277554824876132, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1102.0, "completions/max_terminated_length": 1102.0, "completions/mean_length": 506.5234375, "completions/mean_terminated_length": 508.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.12266666666666666, "grad_norm": 0.00830326322466135, "learning_rate": 2.361111111111111e-06, "loss": 0.0223, "num_tokens": 23279374.0, "reward": 1.4463419914245605, "reward_std": 0.30777207016944885, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7279585599899292, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.899763286113739, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4285850738665308, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.17273333333333335, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.018333036169128736, "calib/mean_conf": 0.583086274509804, "calib/mu_c": 0.5756092715231789, "calib/mu_w": 0.5939423076923076, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08183137254901961, "calib/std_conf": 0.12283162558145794, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 570.51953125, "completions/mean_terminated_length": 572.7568969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.12373333333333333, "grad_norm": 0.006899895146489143, "learning_rate": 2.3333333333333336e-06, "loss": 0.0239, "num_tokens": 23479515.0, "reward": 1.4818896055221558, "reward_std": 0.2726801335811615, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7259652614593506, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.914690375328064, "step": 116 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5177538461538462, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.10890196078431374, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018716923076923186, "calib/mean_conf": 0.5710980392156862, "calib/mu_c": 0.5806399999999999, "calib/mu_w": 0.5619230769230767, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09490196078431376, "calib/std_conf": 0.13763966238995937, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1486.0, "completions/max_terminated_length": 1486.0, "completions/mean_length": 537.26171875, "completions/mean_terminated_length": 539.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.1248, "grad_norm": 0.010562032461166382, "learning_rate": 2.305555555555556e-06, "loss": 0.0179, "num_tokens": 23673222.0, "reward": 1.3744449615478516, "reward_std": 0.3122226893901825, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7230820655822754, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9172408580780029, "step": 117 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5072554996024383, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.08763492063492065, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01569202226345079, "calib/mean_conf": 0.6142222222222222, "calib/mu_c": 0.6203246753246753, "calib/mu_w": 0.6046326530612245, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.045373015873015884, "calib/std_conf": 0.12737402048092236, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 546.9609375, "completions/mean_terminated_length": 551.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.12586666666666665, "grad_norm": 0.008186553604900837, "learning_rate": 2.277777777777778e-06, "loss": -0.0255, "num_tokens": 23866820.0, "reward": 1.4868626594543457, "reward_std": 0.28711938858032227, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7340095043182373, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9059938192367554, "step": 118 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4986163522012579, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.10252734374999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013483647798742227, "calib/mean_conf": 0.6026835937499999, "calib/mu_c": 0.6082666666666667, "calib/mu_w": 0.5947830188679245, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05963671874999997, "calib/std_conf": 0.1323059512298482, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1792.0, "completions/max_terminated_length": 1792.0, "completions/mean_length": 580.6953125, "completions/mean_terminated_length": 582.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.12693333333333334, "grad_norm": 0.006644046399742365, "learning_rate": 2.25e-06, "loss": 0.036, "num_tokens": 24070110.0, "reward": 1.4794600009918213, "reward_std": 0.3241386413574219, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.735337495803833, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9174776077270508, "step": 119 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6072461817937159, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.06495312499999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0679617842965754, "calib/mean_conf": 0.5873906249999999, "calib/mu_c": 0.6110179640718564, "calib/mu_w": 0.543056179775281, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.14516169510104718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1554.0, "completions/max_terminated_length": 1554.0, "completions/mean_length": 516.69140625, "completions/mean_terminated_length": 518.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.128, "grad_norm": 0.009012932889163494, "learning_rate": 2.222222222222222e-06, "loss": -0.0092, "num_tokens": 24258639.0, "reward": 1.5737022161483765, "reward_std": 0.2422880381345749, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7758143544197083, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9384927749633789, "step": 120 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5292538915727322, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.042073622047244165, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006989828234031048, "calib/mean_conf": 0.6238059055118111, "calib/mu_c": 0.6263376543209876, "calib/mu_w": 0.6193478260869566, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.01404212598425204, "calib/std_conf": 0.107397666383644, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2254.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 584.8203125, "completions/mean_terminated_length": 589.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.12906666666666666, "grad_norm": 0.009193326346576214, "learning_rate": 2.1944444444444445e-06, "loss": 0.0392, "num_tokens": 24462977.0, "reward": 1.533304214477539, "reward_std": 0.3090783357620239, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7494633197784424, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9217903017997742, "step": 121 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5856741573033708, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.09127865612648213, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08722732255412435, "calib/mean_conf": 0.6137055335968379, "calib/mu_c": 0.644390243902439, "calib/mu_w": 0.5571629213483147, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028381422924901187, "calib/std_conf": 0.13510463390930255, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2641.0, "completions/max_terminated_length": 2641.0, "completions/mean_length": 572.28125, "completions/mean_terminated_length": 579.0671997070312, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.13013333333333332, "grad_norm": 0.010109186172485352, "learning_rate": 2.166666666666667e-06, "loss": -0.0212, "num_tokens": 24666393.0, "reward": 1.5562331676483154, "reward_std": 0.3059636354446411, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.783021092414856, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.910453200340271, "step": 122 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5110211869982891, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.09377290836653387, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005591064613765018, "calib/mean_conf": 0.630211155378486, "calib/mu_c": 0.6324832214765101, "calib/mu_w": 0.6268921568627451, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.06517928286852591, "calib/std_conf": 0.11383390795653611, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1743.0, "completions/max_terminated_length": 1743.0, "completions/mean_length": 615.42578125, "completions/mean_terminated_length": 625.1944580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1312, "grad_norm": 0.011224367655813694, "learning_rate": 2.138888888888889e-06, "loss": -0.0461, "num_tokens": 24878798.0, "reward": 1.4726450443267822, "reward_std": 0.3578389883041382, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7325735092163086, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9051204919815063, "step": 123 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5085185185185185, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.07579098039215706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0038219191919192097, "calib/mean_conf": 0.6290458823529413, "calib/mu_c": 0.6276969696969698, "calib/mu_w": 0.631518888888889, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028889019607843347, "calib/std_conf": 0.12206579314285393, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1656.0, "completions/max_terminated_length": 1656.0, "completions/mean_length": 566.50390625, "completions/mean_terminated_length": 568.7255249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.13226666666666667, "grad_norm": 0.010063174180686474, "learning_rate": 2.1111111111111114e-06, "loss": -0.0046, "num_tokens": 25080207.0, "reward": 1.5498743057250977, "reward_std": 0.2400267869234085, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7517082691192627, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9226433038711548, "step": 124 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5515110334562868, "calib/avg_num_step_conf": 1.0390625, "calib/ece": 0.0782782283464567, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03312156150909196, "calib/mean_conf": 0.6342152362204725, "calib/mu_c": 0.6473856209150327, "calib/mu_w": 0.6142640594059408, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.055065629921259865, "calib/std_conf": 0.12405846288079574, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2065.0, "completions/max_terminated_length": 2065.0, "completions/mean_length": 560.87109375, "completions/mean_terminated_length": 565.2874145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.13333333333333333, "grad_norm": 0.010898683220148087, "learning_rate": 2.0833333333333334e-06, "loss": -0.0144, "num_tokens": 25278166.0, "reward": 1.5032649040222168, "reward_std": 0.30852293968200684, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.754002571105957, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9238045811653137, "step": 125 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5038186745503819, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.17431372549019622, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00893754619364373, "calib/mean_conf": 0.6475686274509805, "calib/mu_c": 0.6521951219512196, "calib/mu_w": 0.6432575757575759, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16976470588235307, "calib/std_conf": 0.09208462750973315, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 559.76953125, "completions/mean_terminated_length": 559.76953125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.1344, "grad_norm": 0.008297329768538475, "learning_rate": 2.0555555555555555e-06, "loss": 0.0476, "num_tokens": 25476499.0, "reward": 1.361807107925415, "reward_std": 0.25627726316452026, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7161902189254761, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8976604342460632, "step": 126 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5173343605546995, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.1359468000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02282682331792507, "calib/mean_conf": 0.6373068000000001, "calib/mu_c": 0.6480810606060606, "calib/mu_w": 0.6252542372881356, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.12262680000000008, "calib/std_conf": 0.11726038646431285, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 525.640625, "completions/mean_terminated_length": 533.984130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.13546666666666668, "grad_norm": 0.011081523261964321, "learning_rate": 2.027777777777778e-06, "loss": 0.0004, "num_tokens": 25664303.0, "reward": 1.3940820693969727, "reward_std": 0.3191128671169281, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.7170500755310059, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9125405550003052, "step": 127 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4591626409017713, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.13092000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01260869565217404, "calib/mean_conf": 0.6562, "calib/mu_c": 0.662, "calib/mu_w": 0.649391304347826, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.12356000000000004, "calib/std_conf": 0.10301242643487242, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 587.64453125, "completions/mean_terminated_length": 599.3505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.13653333333333334, "grad_norm": 0.007573532871901989, "learning_rate": 2.0000000000000003e-06, "loss": -0.0376, "num_tokens": 25870972.0, "reward": 1.4005755186080933, "reward_std": 0.3928867280483246, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.716552734375, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8879466652870178, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.532983165861934, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.06474509803921571, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027578624559572162, "calib/mean_conf": 0.6372941176470588, "calib/mu_c": 0.6477848101265824, "calib/mu_w": 0.6202061855670102, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04121568627450983, "calib/std_conf": 0.11884794934468638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 498.01171875, "completions/mean_terminated_length": 499.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.1376, "grad_norm": 0.010780357755720615, "learning_rate": 1.9722222222222224e-06, "loss": 0.0086, "num_tokens": 26050415.0, "reward": 1.526939868927002, "reward_std": 0.26041966676712036, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7569589614868164, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9344668984413147, "step": 129 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.535741935483871, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.04375176470588247, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.041687838709677294, "calib/mean_conf": 0.6490066666666668, "calib/mu_c": 0.6653548387096774, "calib/mu_w": 0.6236670000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04245764705882365, "calib/std_conf": 0.1050140472147336, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1662.0, "completions/max_terminated_length": 1662.0, "completions/mean_length": 487.39453125, "completions/mean_terminated_length": 489.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.13866666666666666, "grad_norm": 0.007861044257879257, "learning_rate": 1.944444444444445e-06, "loss": -0.028, "num_tokens": 26230044.0, "reward": 1.5061434507369995, "reward_std": 0.20071539282798767, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.759964108467102, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8968331813812256, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5314141217661934, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.26455729166666664, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026790723810758754, "calib/mean_conf": 0.6434635416666666, "calib/mu_c": 0.6601030927835052, "calib/mu_w": 0.6333123689727465, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26455729166666664, "calib/std_conf": 0.11376442395099201, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 488.61328125, "completions/mean_terminated_length": 490.5294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.13973333333333332, "grad_norm": 0.008661036379635334, "learning_rate": 1.916666666666667e-06, "loss": 0.022, "num_tokens": 26410905.0, "reward": 1.2555246353149414, "reward_std": 0.19447124004364014, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6908596158027649, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9294419288635254, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49281045751633984, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.07675889328063243, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04155882352941187, "calib/mean_conf": 0.6406324110671936, "calib/mu_c": 0.6570588235294118, "calib/mu_w": 0.6154999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.05632411067193677, "calib/std_conf": 0.11819623955263713, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1451.0, "completions/max_terminated_length": 1451.0, "completions/mean_length": 534.12890625, "completions/mean_terminated_length": 536.2235717773438, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.1408, "grad_norm": 0.010745419189333916, "learning_rate": 1.888888888888889e-06, "loss": 0.008, "num_tokens": 26602802.0, "reward": 1.4952141046524048, "reward_std": 0.36359143257141113, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7531276941299438, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9027261734008789, "step": 132 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5754966460268318, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.20268000000000014, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03622420020639827, "calib/mean_conf": 0.65468, "calib/mu_c": 0.6743859649122806, "calib/mu_w": 0.6381617647058824, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.20068000000000014, "calib/std_conf": 0.10599291297063215, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1770.0, "completions/max_terminated_length": 1770.0, "completions/mean_length": 580.69140625, "completions/mean_terminated_length": 592.2589721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.14186666666666667, "grad_norm": 0.008619986474514008, "learning_rate": 1.8611111111111113e-06, "loss": -0.0055, "num_tokens": 26807371.0, "reward": 1.306413173675537, "reward_std": 0.37664172053337097, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6880574226379395, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.9151625037193298, "step": 133 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5193303301284207, "calib/avg_num_step_conf": 0.9765625, "calib/ece": 0.09009349593495942, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02689995293484837, "calib/mean_conf": 0.6551341463414635, "calib/mu_c": 0.6668345323741007, "calib/mu_w": 0.6399345794392524, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.09009349593495942, "calib/std_conf": 0.08956091112481515, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2625.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 603.98828125, "completions/mean_terminated_length": 611.1502075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.14293333333333333, "grad_norm": 0.007842494174838066, "learning_rate": 1.8333333333333333e-06, "loss": 0.0022, "num_tokens": 27020512.0, "reward": 1.4165414571762085, "reward_std": 0.3975376486778259, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7219664454460144, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8972331285476685, "step": 134 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5355844330729868, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.03385039682539689, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.031358241314301094, "calib/mean_conf": 0.6608345238095239, "calib/mu_c": 0.6725316455696204, "calib/mu_w": 0.6411734042553193, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03385039682539689, "calib/std_conf": 0.07078657811647564, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1919.0, "completions/max_terminated_length": 1919.0, "completions/mean_length": 526.890625, "completions/mean_terminated_length": 531.0393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.144, "grad_norm": 0.008880026638507843, "learning_rate": 1.8055555555555557e-06, "loss": 0.0014, "num_tokens": 27210844.0, "reward": 1.5298209190368652, "reward_std": 0.2332524210214615, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7603797912597656, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9375864267349243, "step": 135 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5934523809523811, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.13391304347826086, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.07988095238095239, "calib/mean_conf": 0.5921739130434783, "calib/mu_c": 0.6341666666666667, "calib/mu_w": 0.5542857142857143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.1258893280632411, "calib/std_conf": 0.16718199531236863, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2463.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 549.484375, "completions/mean_terminated_length": 549.484375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.14506666666666668, "grad_norm": 0.006931147072464228, "learning_rate": 1.777777777777778e-06, "loss": 0.0455, "num_tokens": 27409568.0, "reward": 1.3625285625457764, "reward_std": 0.28099530935287476, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7359734177589417, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.921917200088501, "step": 136 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5247395833333334, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.05992063492063495, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011698717948717907, "calib/mean_conf": 0.6551587301587303, "calib/mu_c": 0.6596153846153846, "calib/mu_w": 0.6479166666666667, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.048015873015873047, "calib/std_conf": 0.08016089105998236, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1667.0, "completions/max_terminated_length": 1667.0, "completions/mean_length": 513.90625, "completions/mean_terminated_length": 522.0635375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.14613333333333334, "grad_norm": 0.00839358102530241, "learning_rate": 1.75e-06, "loss": -0.059, "num_tokens": 27597680.0, "reward": 1.5042455196380615, "reward_std": 0.3065860867500305, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.746573805809021, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9097723960876465, "step": 137 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5489845938375351, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.036059288537549423, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03486099439775914, "calib/mean_conf": 0.6279723320158103, "calib/mu_c": 0.6396845238095238, "calib/mu_w": 0.6048235294117646, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.12983462887471897, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1324.0, "completions/max_terminated_length": 1324.0, "completions/mean_length": 526.61328125, "completions/mean_terminated_length": 532.8577270507812, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.1472, "grad_norm": 0.007144542410969734, "learning_rate": 1.7222222222222224e-06, "loss": -0.0157, "num_tokens": 27786397.0, "reward": 1.565363883972168, "reward_std": 0.2977532148361206, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7652297019958496, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9200586080551147, "step": 138 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5472686199342827, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.014921259842519755, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03043948521358164, "calib/mean_conf": 0.6531889763779528, "calib/mu_c": 0.6637349397590362, "calib/mu_w": 0.6332954545454546, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.007283464566929203, "calib/std_conf": 0.08604880800333448, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2195.0, "completions/max_terminated_length": 2195.0, "completions/mean_length": 491.1875, "completions/mean_terminated_length": 493.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.14826666666666666, "grad_norm": 0.010381989181041718, "learning_rate": 1.6944444444444446e-06, "loss": 0.0109, "num_tokens": 27964805.0, "reward": 1.560642957687378, "reward_std": 0.23459778726100922, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7682285308837891, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.915489673614502, "step": 139 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5138813282525857, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.030941176470588257, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014763881328252682, "calib/mean_conf": 0.6525098039215687, "calib/mu_c": 0.6576047904191618, "calib/mu_w": 0.6428409090909091, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.01427450980392159, "calib/std_conf": 0.10476104884384996, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1629.0, "completions/max_terminated_length": 1629.0, "completions/mean_length": 495.05078125, "completions/mean_terminated_length": 496.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.14933333333333335, "grad_norm": 0.010000730864703655, "learning_rate": 1.6666666666666667e-06, "loss": -0.0318, "num_tokens": 28146122.0, "reward": 1.5590664148330688, "reward_std": 0.27282488346099854, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7630187273025513, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9102286100387573, "step": 140 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5237432112519149, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.04509881422924892, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02759086478206385, "calib/mean_conf": 0.6377470355731226, "calib/mu_c": 0.6471257485029941, "calib/mu_w": 0.6195348837209302, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.011383399209486167, "calib/std_conf": 0.12127478044768873, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2179.0, "completions/max_terminated_length": 2179.0, "completions/mean_length": 549.09765625, "completions/mean_terminated_length": 551.2510375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1504, "grad_norm": 0.007173791527748108, "learning_rate": 1.638888888888889e-06, "loss": 0.0293, "num_tokens": 28343355.0, "reward": 1.5514299869537354, "reward_std": 0.2732362151145935, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7581105828285217, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9035614132881165, "step": 141 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5766574159728647, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.09984375000000009, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0523860622880048, "calib/mean_conf": 0.619375, "calib/mu_c": 0.6429078014184397, "calib/mu_w": 0.5905217391304349, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08421875000000009, "calib/std_conf": 0.1368150553667249, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1569.0, "completions/max_terminated_length": 1569.0, "completions/mean_length": 568.1015625, "completions/mean_terminated_length": 570.3294677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.15146666666666667, "grad_norm": 0.011100290343165398, "learning_rate": 1.6111111111111113e-06, "loss": -0.0116, "num_tokens": 28543517.0, "reward": 1.4611070156097412, "reward_std": 0.2612740695476532, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7529253959655762, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.940139889717102, "step": 142 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5708579484425348, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.04076000000000009, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0664419978517724, "calib/mean_conf": 0.6035600000000001, "calib/mu_c": 0.6296052631578948, "calib/mu_w": 0.5631632653061224, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.01816000000000009, "calib/std_conf": 0.15534904698774307, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2268.0, "completions/max_terminated_length": 2268.0, "completions/mean_length": 548.51953125, "completions/mean_terminated_length": 561.6840209960938, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.15253333333333333, "grad_norm": 0.010165936313569546, "learning_rate": 1.5833333333333333e-06, "loss": -0.0858, "num_tokens": 28740842.0, "reward": 1.4874682426452637, "reward_std": 0.2669368386268616, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7490015029907227, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9096827507019043, "step": 143 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5699814471243043, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.07992063492063484, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05187532467532463, "calib/mean_conf": 0.6184920634920635, "calib/mu_c": 0.6343428571428572, "calib/mu_w": 0.5824675324675326, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.001984126984126984, "calib/std_conf": 0.1319034486974533, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1586.0, "completions/max_terminated_length": 1586.0, "completions/mean_length": 511.84765625, "completions/mean_terminated_length": 515.8779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.1536, "grad_norm": 0.011674090288579464, "learning_rate": 1.5555555555555558e-06, "loss": -0.0547, "num_tokens": 28925571.0, "reward": 1.585937261581421, "reward_std": 0.277024507522583, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.7722121477127075, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8883872032165527, "step": 144 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5292712066905616, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.04329411764705898, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027363600159299062, "calib/mean_conf": 0.6145882352941178, "calib/mu_c": 0.624567901234568, "calib/mu_w": 0.5972043010752689, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011294117647058982, "calib/std_conf": 0.13318322576922959, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 514.52734375, "completions/mean_terminated_length": 516.5451049804688, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.15466666666666667, "grad_norm": 0.0095293540507555, "learning_rate": 1.527777777777778e-06, "loss": 0.037, "num_tokens": 29109562.0, "reward": 1.5433859825134277, "reward_std": 0.329196572303772, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7598382830619812, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9273048639297485, "step": 145 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.578062996031746, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.1430446194225722, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0636024305555557, "calib/mean_conf": 0.6351706036745408, "calib/mu_c": 0.6672222222222224, "calib/mu_w": 0.6036197916666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14107611548556434, "calib/std_conf": 0.11527646304810632, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1977.0, "completions/max_terminated_length": 1977.0, "completions/mean_length": 532.34375, "completions/mean_terminated_length": 534.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.15573333333333333, "grad_norm": 0.009710113517940044, "learning_rate": 1.5e-06, "loss": 0.0232, "num_tokens": 29302626.0, "reward": 1.3976036310195923, "reward_std": 0.2787143588066101, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7433222532272339, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9443954229354858, "step": 146 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5880142892338014, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.1289411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08953621581670357, "calib/mean_conf": 0.6112941176470588, "calib/mu_c": 0.6576422764227642, "calib/mu_w": 0.5681060606060606, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1289411764705883, "calib/std_conf": 0.1503626067039598, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 543.97265625, "completions/mean_terminated_length": 546.1058959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1568, "grad_norm": 0.010358953848481178, "learning_rate": 1.4722222222222225e-06, "loss": 0.0001, "num_tokens": 29495131.0, "reward": 1.3876032829284668, "reward_std": 0.18397116661071777, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7528367042541504, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9275525808334351, "step": 147 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.511323207443897, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.013333333333333405, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010843596059113114, "calib/mean_conf": 0.655764705882353, "calib/mu_c": 0.6594642857142856, "calib/mu_w": 0.6486206896551725, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0051372549019608575, "calib/std_conf": 0.07447662832117177, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2731.0, "completions/max_terminated_length": 2731.0, "completions/mean_length": 506.578125, "completions/mean_terminated_length": 506.578125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.15786666666666666, "grad_norm": 0.00780367199331522, "learning_rate": 1.4444444444444445e-06, "loss": 0.014, "num_tokens": 29679495.0, "reward": 1.567508578300476, "reward_std": 0.2392946481704712, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7715179920196533, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9066858291625977, "step": 148 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5755208333333334, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.018366015624999975, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05477905844155839, "calib/mean_conf": 0.6378839843749999, "calib/mu_c": 0.6567142857142857, "calib/mu_w": 0.6019352272727273, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.11053841322448618, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1669.0, "completions/max_terminated_length": 1669.0, "completions/mean_length": 571.94140625, "completions/mean_terminated_length": 574.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.15893333333333334, "grad_norm": 0.007841997779905796, "learning_rate": 1.4166666666666667e-06, "loss": 0.058, "num_tokens": 29879936.0, "reward": 1.5820075273513794, "reward_std": 0.2582079768180847, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.7865728139877319, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.929884672164917, "step": 149 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5208812260536398, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.15889328063241115, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0028933588761175244, "calib/mean_conf": 0.6145454545454546, "calib/mu_c": 0.6133103448275863, "calib/mu_w": 0.6162037037037038, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.10015810276679851, "calib/std_conf": 0.14518453198995135, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 476.0390625, "completions/mean_terminated_length": 479.78741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.16, "grad_norm": 0.009207825176417828, "learning_rate": 1.3888888888888892e-06, "loss": -0.0237, "num_tokens": 30056330.0, "reward": 1.4508132934570312, "reward_std": 0.2670350670814514, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7225687503814697, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9065535068511963, "step": 150 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.555614242217113, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.1509566929133859, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.042970049089666285, "calib/mean_conf": 0.6273346456692914, "calib/mu_c": 0.6498347107438017, "calib/mu_w": 0.6068646616541354, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1509566929133859, "calib/std_conf": 0.1171076050557022, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2029.0, "completions/max_terminated_length": 2029.0, "completions/mean_length": 565.8125, "completions/mean_terminated_length": 570.2677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.16106666666666666, "grad_norm": 0.008064262568950653, "learning_rate": 1.3611111111111112e-06, "loss": 0.0237, "num_tokens": 30257770.0, "reward": 1.3659863471984863, "reward_std": 0.2792539596557617, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7297468781471252, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9232016205787659, "step": 151 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5315154922538731, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.1092913385826772, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021996501749125374, "calib/mean_conf": 0.6348818897637796, "calib/mu_c": 0.644927536231884, "calib/mu_w": 0.6229310344827587, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10043307086614177, "calib/std_conf": 0.10816920357085563, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1918.0, "completions/max_terminated_length": 1918.0, "completions/mean_length": 531.640625, "completions/mean_terminated_length": 533.7255249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.16213333333333332, "grad_norm": 0.008094547316432, "learning_rate": 1.3333333333333334e-06, "loss": -0.0085, "num_tokens": 30448830.0, "reward": 1.4350159168243408, "reward_std": 0.3242112696170807, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7347495555877686, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9283766150474548, "step": 152 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5265229736706246, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.08624505928853769, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02604349509550863, "calib/mean_conf": 0.6175494071146246, "calib/mu_c": 0.628255033557047, "calib/mu_w": 0.6022115384615384, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.057430830039525836, "calib/std_conf": 0.1446145296415369, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2109.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 532.80078125, "completions/mean_terminated_length": 534.8901977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.1632, "grad_norm": 0.006869491655379534, "learning_rate": 1.3055555555555556e-06, "loss": -0.0, "num_tokens": 30642115.0, "reward": 1.4726076126098633, "reward_std": 0.2840556502342224, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7330499887466431, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9196433424949646, "step": 153 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.515411376953125, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.1566796875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023828124999999867, "calib/mean_conf": 0.6541015625, "calib/mu_c": 0.666015625, "calib/mu_w": 0.6421875000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15539062500000012, "calib/std_conf": 0.08811820362478229, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 473.9453125, "completions/mean_terminated_length": 475.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.16426666666666667, "grad_norm": 0.010958393104374409, "learning_rate": 1.2777777777777779e-06, "loss": 0.0028, "num_tokens": 30817453.0, "reward": 1.4038617610931396, "reward_std": 0.25540071725845337, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7304019331932068, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9546434283256531, "step": 154 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4685954953305255, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.14996093750000009, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021834218397119076, "calib/mean_conf": 0.6323046875, "calib/mu_c": 0.6433070866141734, "calib/mu_w": 0.6214728682170543, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1430859375000001, "calib/std_conf": 0.13924377558988893, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 483.54296875, "completions/mean_terminated_length": 485.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.16533333333333333, "grad_norm": 0.00791851058602333, "learning_rate": 1.25e-06, "loss": 0.0116, "num_tokens": 30998024.0, "reward": 1.3909826278686523, "reward_std": 0.2507619261741638, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7212804555892944, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9416821002960205, "step": 155 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5218097300050942, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.08592549019607845, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.033762544574630904, "calib/mean_conf": 0.6125215686274509, "calib/mu_c": 0.626291390728477, "calib/mu_w": 0.592528846153846, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.053145098039215716, "calib/std_conf": 0.15812955263690356, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1843.0, "completions/max_terminated_length": 1843.0, "completions/mean_length": 549.921875, "completions/mean_terminated_length": 552.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1664, "grad_norm": 0.008392121642827988, "learning_rate": 1.2222222222222223e-06, "loss": -0.0058, "num_tokens": 31193132.0, "reward": 1.4988298416137695, "reward_std": 0.20456862449645996, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7443010210990906, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9410922527313232, "step": 156 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4828834759710335, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.04411067193675897, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020063857801185025, "calib/mean_conf": 0.6566798418972333, "calib/mu_c": 0.6644516129032259, "calib/mu_w": 0.6443877551020408, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04407114624505937, "calib/std_conf": 0.08901771967302509, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1223.0, "completions/max_terminated_length": 1223.0, "completions/mean_length": 496.69921875, "completions/mean_terminated_length": 498.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.16746666666666668, "grad_norm": 0.009014573879539967, "learning_rate": 1.1944444444444446e-06, "loss": -0.0128, "num_tokens": 31373583.0, "reward": 1.505033254623413, "reward_std": 0.2585407495498657, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7499347925186157, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9015136957168579, "step": 157 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.52470703125, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.03218750000000011, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025124999999999953, "calib/mean_conf": 0.651328125, "calib/mu_c": 0.66075, "calib/mu_w": 0.635625, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.029257812500000112, "calib/std_conf": 0.0879574745771181, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1595.0, "completions/max_terminated_length": 1595.0, "completions/mean_length": 509.56640625, "completions/mean_terminated_length": 511.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.16853333333333334, "grad_norm": 0.00884316861629486, "learning_rate": 1.1666666666666668e-06, "loss": 0.0338, "num_tokens": 31558840.0, "reward": 1.542033076286316, "reward_std": 0.3029077649116516, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.768972635269165, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9301871657371521, "step": 158 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5182214299861359, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.05412698412698415, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023553178847296352, "calib/mean_conf": 0.6612698412698413, "calib/mu_c": 0.6705228758169934, "calib/mu_w": 0.6469696969696971, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.05412698412698415, "calib/std_conf": 0.06500285863047514, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1510.0, "completions/max_terminated_length": 1510.0, "completions/mean_length": 471.26171875, "completions/mean_terminated_length": 473.1098327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.1696, "grad_norm": 0.011129575781524181, "learning_rate": 1.138888888888889e-06, "loss": 0.0116, "num_tokens": 31733835.0, "reward": 1.500402808189392, "reward_std": 0.2676931321620941, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7535984516143799, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9225396513938904, "step": 159 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5519440883843009, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.15042968750000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032352438503326475, "calib/mean_conf": 0.6445703125, "calib/mu_c": 0.6606201550387597, "calib/mu_w": 0.6282677165354332, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14554687500000002, "calib/std_conf": 0.10163031840278935, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1552.0, "completions/max_terminated_length": 1552.0, "completions/mean_length": 495.98828125, "completions/mean_terminated_length": 497.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.17066666666666666, "grad_norm": 0.008333866484463215, "learning_rate": 1.111111111111111e-06, "loss": 0.004, "num_tokens": 31915216.0, "reward": 1.4053471088409424, "reward_std": 0.2977757155895233, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7360754013061523, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9336129426956177, "step": 160 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5120133714046939, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.03992187500000008, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011908907305523408, "calib/mean_conf": 0.641484375, "calib/mu_c": 0.6410982658959538, "calib/mu_w": 0.6422891566265061, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002812500000000077, "calib/std_conf": 0.10969125594530942, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 476.921875, "completions/mean_terminated_length": 478.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.17173333333333332, "grad_norm": 0.007851874455809593, "learning_rate": 1.0833333333333335e-06, "loss": 0.0153, "num_tokens": 32090796.0, "reward": 1.5846412181854248, "reward_std": 0.2483762502670288, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7650160193443298, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9100950956344604, "step": 161 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5350936329588014, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.0473122529644268, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023658426966292168, "calib/mean_conf": 0.6562450592885377, "calib/mu_c": 0.6632584269662921, "calib/mu_w": 0.6396, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.07882869820921898, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1624.0, "completions/max_terminated_length": 1624.0, "completions/mean_length": 464.62890625, "completions/mean_terminated_length": 466.4510192871094, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1728, "grad_norm": 0.008597790263593197, "learning_rate": 1.0555555555555557e-06, "loss": 0.0053, "num_tokens": 32263453.0, "reward": 1.608614444732666, "reward_std": 0.2592753767967224, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.78007972240448, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9117984771728516, "step": 162 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5342346812205407, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.10692823529411782, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.036115647482014346, "calib/mean_conf": 0.640261568627451, "calib/mu_c": 0.6566906474820144, "calib/mu_w": 0.6205750000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10104588235294135, "calib/std_conf": 0.1102153941664453, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 570.4296875, "completions/mean_terminated_length": 572.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.17386666666666667, "grad_norm": 0.008555339649319649, "learning_rate": 1.0277777777777777e-06, "loss": -0.0165, "num_tokens": 32463883.0, "reward": 1.445723056793213, "reward_std": 0.30785322189331055, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7436636686325073, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9330652356147766, "step": 163 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5751426958539798, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.08320312500000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04292667628426272, "calib/mean_conf": 0.6450781250000001, "calib/mu_c": 0.6630201342281881, "calib/mu_w": 0.6200934579439253, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07312500000000001, "calib/std_conf": 0.09925500690889288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 571.4375, "completions/mean_terminated_length": 573.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.17493333333333333, "grad_norm": 0.008878960274159908, "learning_rate": 1.0000000000000002e-06, "loss": -0.0092, "num_tokens": 32665875.0, "reward": 1.499582290649414, "reward_std": 0.28364017605781555, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7637882828712463, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9426275491714478, "step": 164 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5401851851851852, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.1142352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03287037037037055, "calib/mean_conf": 0.6382352941176471, "calib/mu_c": 0.6537037037037039, "calib/mu_w": 0.6208333333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11152941176470593, "calib/std_conf": 0.1090836319267252, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1636.0, "completions/max_terminated_length": 1636.0, "completions/mean_length": 532.71484375, "completions/mean_terminated_length": 534.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.176, "grad_norm": 0.008074701763689518, "learning_rate": 9.722222222222224e-07, "loss": 0.0129, "num_tokens": 32857394.0, "reward": 1.4330260753631592, "reward_std": 0.2663751542568207, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7405972480773926, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.946222186088562, "step": 165 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6394679842967376, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.07301960784313707, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09980370921889803, "calib/mean_conf": 0.6305882352941177, "calib/mu_c": 0.665421686746988, "calib/mu_w": 0.56561797752809, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02631372549019608, "calib/std_conf": 0.11282837504162183, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1700.0, "completions/max_terminated_length": 1700.0, "completions/mean_length": 532.42578125, "completions/mean_terminated_length": 534.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.17706666666666668, "grad_norm": 0.008140970021486282, "learning_rate": 9.444444444444445e-07, "loss": -0.0144, "num_tokens": 33049447.0, "reward": 1.583161473274231, "reward_std": 0.2504641115665436, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.801856279373169, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.939871072769165, "step": 166 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6087347729789591, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.05496093749999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07891196013289037, "calib/mean_conf": 0.6298046875000001, "calib/mu_c": 0.6556976744186046, "calib/mu_w": 0.5767857142857142, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006445312500000001, "calib/std_conf": 0.12323900550567318, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1315.0, "completions/max_terminated_length": 1315.0, "completions/mean_length": 498.234375, "completions/mean_terminated_length": 500.1882629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.17813333333333334, "grad_norm": 0.007336960639804602, "learning_rate": 9.166666666666666e-07, "loss": -0.0083, "num_tokens": 33232171.0, "reward": 1.6025943756103516, "reward_std": 0.23913165926933289, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7973769903182983, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9281233549118042, "step": 167 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5206031976744185, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.054563492063491995, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02935465116279068, "calib/mean_conf": 0.6517857142857143, "calib/mu_c": 0.6611046511627907, "calib/mu_w": 0.63175, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.011904761904761906, "calib/std_conf": 0.08414123681302946, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2670.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 556.69140625, "completions/mean_terminated_length": 561.0748291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.1792, "grad_norm": 0.009717374108731747, "learning_rate": 8.88888888888889e-07, "loss": -0.0281, "num_tokens": 33428924.0, "reward": 1.580051064491272, "reward_std": 0.311530739068985, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.772222638130188, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9116963744163513, "step": 168 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5652173913043479, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.01580392156862746, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.041521739130434776, "calib/mean_conf": 0.6550196078431374, "calib/mu_c": 0.67, "calib/mu_w": 0.6284782608695653, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01580392156862746, "calib/std_conf": 0.07417453984104336, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1551.0, "completions/max_terminated_length": 1551.0, "completions/mean_length": 525.1015625, "completions/mean_terminated_length": 527.1608276367188, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.18026666666666666, "grad_norm": 0.00645485008135438, "learning_rate": 8.611111111111112e-07, "loss": 0.0181, "num_tokens": 33617102.0, "reward": 1.5582530498504639, "reward_std": 0.18583500385284424, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.779723048210144, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9313783049583435, "step": 169 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5354248448778017, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.07203921568627457, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02870203874889199, "calib/mean_conf": 0.6524313725490197, "calib/mu_c": 0.6643624161073827, "calib/mu_w": 0.6356603773584907, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07007843137254909, "calib/std_conf": 0.08116051751928682, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1713.0, "completions/max_terminated_length": 1713.0, "completions/mean_length": 527.96875, "completions/mean_terminated_length": 530.0392456054688, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.18133333333333335, "grad_norm": 0.0113950464874506, "learning_rate": 8.333333333333333e-07, "loss": -0.011, "num_tokens": 33805982.0, "reward": 1.4930140972137451, "reward_std": 0.2828036844730377, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.753375768661499, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9465547800064087, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5311561561561562, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.12414062500000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013443443443443281, "calib/mean_conf": 0.64203125, "calib/mu_c": 0.6477027027027027, "calib/mu_w": 0.6342592592592594, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09402343750000014, "calib/std_conf": 0.11789732937364401, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1158.0, "completions/max_terminated_length": 1158.0, "completions/mean_length": 486.09765625, "completions/mean_terminated_length": 488.0039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1824, "grad_norm": 0.010750283487141132, "learning_rate": 8.055555555555557e-07, "loss": -0.0019, "num_tokens": 33986887.0, "reward": 1.4827287197113037, "reward_std": 0.22716417908668518, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7425246238708496, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9380537271499634, "step": 171 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5929768880208333, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.11585937499999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0741666666666666, "calib/mean_conf": 0.6400000000000001, "calib/mu_c": 0.6585416666666667, "calib/mu_w": 0.5843750000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0029296875, "calib/std_conf": 0.10352762312542485, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1438.0, "completions/max_terminated_length": 1438.0, "completions/mean_length": 492.12890625, "completions/mean_terminated_length": 494.058837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.18346666666666667, "grad_norm": 0.0088827945291996, "learning_rate": 7.777777777777779e-07, "loss": 0.0473, "num_tokens": 34165792.0, "reward": 1.6941030025482178, "reward_std": 0.21069438755512238, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.8174945116043091, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9414230585098267, "step": 172 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5189829083776969, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.06450980392156858, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026133370692070668, "calib/mean_conf": 0.6384705882352942, "calib/mu_c": 0.6469767441860466, "calib/mu_w": 0.6208433734939759, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01423529411764706, "calib/std_conf": 0.10817747816438932, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2060.0, "completions/max_terminated_length": 2060.0, "completions/mean_length": 570.3828125, "completions/mean_terminated_length": 570.3828125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.18453333333333333, "grad_norm": 0.007619481533765793, "learning_rate": 7.5e-07, "loss": 0.0098, "num_tokens": 34364538.0, "reward": 1.5769091844558716, "reward_std": 0.18090735375881195, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.775884747505188, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8730547428131104, "step": 173 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5833974112520826, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.05586274509803936, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0673039215686273, "calib/mean_conf": 0.6109215686274511, "calib/mu_c": 0.6378431372549019, "calib/mu_w": 0.5705392156862746, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03339215686274525, "calib/std_conf": 0.14042803517889893, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2777.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 583.55859375, "completions/mean_terminated_length": 585.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1856, "grad_norm": 0.011764385737478733, "learning_rate": 7.222222222222222e-07, "loss": 0.0101, "num_tokens": 34567729.0, "reward": 1.5053462982177734, "reward_std": 0.30150508880615234, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7672964334487915, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9055424928665161, "step": 174 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5537757437070938, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.1930952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03488176964149503, "calib/mean_conf": 0.6256349206349208, "calib/mu_c": 0.6447368421052632, "calib/mu_w": 0.6098550724637681, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.18317460317460318, "calib/std_conf": 0.1256285532183025, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1485.0, "completions/max_terminated_length": 1485.0, "completions/mean_length": 530.48828125, "completions/mean_terminated_length": 538.9087524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.18666666666666668, "grad_norm": 0.008950725197792053, "learning_rate": 6.944444444444446e-07, "loss": -0.015, "num_tokens": 34758926.0, "reward": 1.3267838954925537, "reward_std": 0.2932436466217041, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.71073317527771, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.927856981754303, "step": 175 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6392082149315422, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.03378906249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.09797339188840093, "calib/mean_conf": 0.5873046875, "calib/mu_c": 0.6248101265822785, "calib/mu_w": 0.5268367346938776, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001953125000000001, "calib/std_conf": 0.1554107490347027, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1722.0, "completions/max_terminated_length": 1722.0, "completions/mean_length": 542.42578125, "completions/mean_terminated_length": 544.552978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.18773333333333334, "grad_norm": 0.009000916965305805, "learning_rate": 6.666666666666667e-07, "loss": 0.0089, "num_tokens": 34951419.0, "reward": 1.5398871898651123, "reward_std": 0.24812811613082886, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7793495655059814, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.941474974155426, "step": 176 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49983406345413506, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.06494117647058828, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001180804460374385, "calib/mean_conf": 0.6324705882352941, "calib/mu_c": 0.6329012345679013, "calib/mu_w": 0.6317204301075269, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.031058823529411805, "calib/std_conf": 0.11191284300323062, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1931.0, "completions/max_terminated_length": 1931.0, "completions/mean_length": 500.0859375, "completions/mean_terminated_length": 502.0470886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.1888, "grad_norm": 0.008004814386367798, "learning_rate": 6.388888888888889e-07, "loss": 0.0152, "num_tokens": 35132841.0, "reward": 1.5370360612869263, "reward_std": 0.1942150890827179, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7533648610115051, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9148522615432739, "step": 177 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5527701778385773, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.07167968749999984, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04614637482900141, "calib/mean_conf": 0.6130859375000001, "calib/mu_c": 0.6285882352941178, "calib/mu_w": 0.5824418604651164, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0103515625, "calib/std_conf": 0.1351628374026903, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1578.0, "completions/max_terminated_length": 1578.0, "completions/mean_length": 516.81640625, "completions/mean_terminated_length": 518.8431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.18986666666666666, "grad_norm": 0.009003140032291412, "learning_rate": 6.111111111111112e-07, "loss": 0.0263, "num_tokens": 35320786.0, "reward": 1.5860583782196045, "reward_std": 0.24094577133655548, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7766379117965698, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9347081184387207, "step": 178 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5568947906026558, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.011259842519684905, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04089479060265577, "calib/mean_conf": 0.6433070866141734, "calib/mu_c": 0.6576363636363637, "calib/mu_w": 0.6167415730337079, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.002480314960629923, "calib/std_conf": 0.09975209522090753, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 546.37890625, "completions/mean_terminated_length": 548.5216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.19093333333333334, "grad_norm": 0.008436013013124466, "learning_rate": 5.833333333333334e-07, "loss": 0.0183, "num_tokens": 35516491.0, "reward": 1.5619540214538574, "reward_std": 0.28580376505851746, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7749070525169373, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9292519092559814, "step": 179 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.591525974025974, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.03720472440944883, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06620000000000004, "calib/mean_conf": 0.6389370078740159, "calib/mu_c": 0.665, "calib/mu_w": 0.5988, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03492125984251969, "calib/std_conf": 0.102791988415371, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2703.0, "completions/max_terminated_length": 2703.0, "completions/mean_length": 593.40625, "completions/mean_terminated_length": 595.7333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.192, "grad_norm": 0.007525734603404999, "learning_rate": 5.555555555555555e-07, "loss": 0.0181, "num_tokens": 35721827.0, "reward": 1.5143935680389404, "reward_std": 0.21014289557933807, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7734594345092773, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9184682965278625, "step": 180 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5050963676797628, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.12420703125000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004802322708179485, "calib/mean_conf": 0.66584765625, "calib/mu_c": 0.6656338028169014, "calib/mu_w": 0.6661140350877194, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11768359375000008, "calib/std_conf": 0.059081348136546745, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 493.46875, "completions/mean_terminated_length": 495.4039611816406, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.19306666666666666, "grad_norm": 0.010766234248876572, "learning_rate": 5.277777777777779e-07, "loss": 0.0202, "num_tokens": 35903987.0, "reward": 1.4602396488189697, "reward_std": 0.33089858293533325, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7334253787994385, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.960045576095581, "step": 181 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5239290495314592, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.07320312500000015, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0030040160642570424, "calib/mean_conf": 0.6382812500000001, "calib/mu_c": 0.6393373493975905, "calib/mu_w": 0.6363333333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.031523437500000154, "calib/std_conf": 0.11587715326343455, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1180.0, "completions/max_terminated_length": 1180.0, "completions/mean_length": 544.88671875, "completions/mean_terminated_length": 547.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.19413333333333332, "grad_norm": 0.007987387478351593, "learning_rate": 5.000000000000001e-07, "loss": 0.0008, "num_tokens": 36099206.0, "reward": 1.5611926317214966, "reward_std": 0.2433495819568634, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7598726749420166, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9312751889228821, "step": 182 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5222813995820131, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.03771653543307099, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01627856805770911, "calib/mean_conf": 0.6422047244094489, "calib/mu_c": 0.6480368098159509, "calib/mu_w": 0.6317582417582418, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.019094488188976508, "calib/std_conf": 0.0966397106704452, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2947.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 568.30859375, "completions/mean_terminated_length": 568.30859375, "completions/min_length": 222.0, "completions/min_terminated_length": 222.0, "epoch": 0.1952, "grad_norm": 0.009835210628807545, "learning_rate": 4.7222222222222226e-07, "loss": 0.0282, "num_tokens": 36300941.0, "reward": 1.5496151447296143, "reward_std": 0.25276634097099304, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7622320652008057, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9364967346191406, "step": 183 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5460555732992654, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.04847656250000017, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.033758543596295154, "calib/mean_conf": 0.6511328125, "calib/mu_c": 0.6644516129032259, "calib/mu_w": 0.6306930693069307, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04707031250000017, "calib/std_conf": 0.08471854791508082, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1210.0, "completions/max_terminated_length": 1210.0, "completions/mean_length": 526.40625, "completions/mean_terminated_length": 528.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.19626666666666667, "grad_norm": 0.009076988324522972, "learning_rate": 4.444444444444445e-07, "loss": -0.0007, "num_tokens": 36490549.0, "reward": 1.522108793258667, "reward_std": 0.27043280005455017, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.767989456653595, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9305816888809204, "step": 184 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6082545311268716, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.03281249999999993, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08436826897819805, "calib/mean_conf": 0.624453125, "calib/mu_c": 0.6554320987654322, "calib/mu_w": 0.5710638297872341, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0122265625, "calib/std_conf": 0.12645903853712623, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1319.0, "completions/max_terminated_length": 1319.0, "completions/mean_length": 502.0078125, "completions/mean_terminated_length": 503.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.19733333333333333, "grad_norm": 0.009935007430613041, "learning_rate": 4.1666666666666667e-07, "loss": 0.0068, "num_tokens": 36675551.0, "reward": 1.5608875751495361, "reward_std": 0.28984230756759644, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7907851934432983, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9307299256324768, "step": 185 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5613543091655266, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.03773437499999985, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.047255813953488324, "calib/mean_conf": 0.6481250000000001, "calib/mu_c": 0.664, "calib/mu_w": 0.6167441860465117, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0108984375, "calib/std_conf": 0.08651002470812272, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 527.25390625, "completions/mean_terminated_length": 529.3215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.1984, "grad_norm": 0.007592697162181139, "learning_rate": 3.8888888888888895e-07, "loss": 0.0263, "num_tokens": 36865136.0, "reward": 1.5913572311401367, "reward_std": 0.2117360234260559, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7867816686630249, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9403030872344971, "step": 186 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5813946177062375, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.07122047244094504, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07018737424547283, "calib/mean_conf": 0.6146850393700787, "calib/mu_c": 0.6456338028169014, "calib/mu_w": 0.5754464285714286, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06342519685039386, "calib/std_conf": 0.1372124739392378, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2159.0, "completions/max_terminated_length": 2159.0, "completions/mean_length": 571.64453125, "completions/mean_terminated_length": 576.1456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.19946666666666665, "grad_norm": 0.007867695763707161, "learning_rate": 3.611111111111111e-07, "loss": -0.0058, "num_tokens": 37062589.0, "reward": 1.4636740684509277, "reward_std": 0.32446834444999695, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7601839303970337, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9249536991119385, "step": 187 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5852736518131941, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.019960474308300342, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0659346982470227, "calib/mean_conf": 0.6384584980237155, "calib/mu_c": 0.6629559748427675, "calib/mu_w": 0.5970212765957448, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.014980237154150196, "calib/std_conf": 0.11008977318084041, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 594.90625, "completions/mean_terminated_length": 599.590576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.20053333333333334, "grad_norm": 0.009555697441101074, "learning_rate": 3.3333333333333335e-07, "loss": 0.0029, "num_tokens": 37268525.0, "reward": 1.5442224740982056, "reward_std": 0.28254127502441406, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.775873064994812, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9548314809799194, "step": 188 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.533833141099577, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.05854901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032549019607843066, "calib/mean_conf": 0.6487450980392159, "calib/mu_c": 0.6617647058823529, "calib/mu_w": 0.6292156862745099, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05364705882352941, "calib/std_conf": 0.08995856195008704, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 512.140625, "completions/mean_terminated_length": 514.1490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.2016, "grad_norm": 0.011023632250726223, "learning_rate": 3.055555555555556e-07, "loss": -0.0082, "num_tokens": 37456969.0, "reward": 1.5090863704681396, "reward_std": 0.28942179679870605, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7621660232543945, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9260757565498352, "step": 189 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5403963414634146, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.048710937500000134, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0273038176033934, "calib/mean_conf": 0.6223828125, "calib/mu_c": 0.6321951219512195, "calib/mu_w": 0.6048913043478261, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015234375000000133, "calib/std_conf": 0.1250881952047828, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1537.0, "completions/max_terminated_length": 1537.0, "completions/mean_length": 566.47265625, "completions/mean_terminated_length": 568.6941528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.20266666666666666, "grad_norm": 0.008933677338063717, "learning_rate": 2.7777777777777776e-07, "loss": 0.0068, "num_tokens": 37657162.0, "reward": 1.5577049255371094, "reward_std": 0.24305100739002228, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7642148733139038, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.944577693939209, "step": 190 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.537377450980392, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.12867187500000016, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0370588235294117, "calib/mean_conf": 0.6321875, "calib/mu_c": 0.6495588235294117, "calib/mu_w": 0.6125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11480468750000017, "calib/std_conf": 0.12020449697806652, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1497.0, "completions/max_terminated_length": 1497.0, "completions/mean_length": 518.5859375, "completions/mean_terminated_length": 520.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.20373333333333332, "grad_norm": 0.008770892396569252, "learning_rate": 2.5000000000000004e-07, "loss": -0.0015, "num_tokens": 37843656.0, "reward": 1.4288978576660156, "reward_std": 0.27863526344299316, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7413152456283569, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9126487374305725, "step": 191 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6281150583244963, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.06605468749999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.10552757158006365, "calib/mean_conf": 0.6104296874999999, "calib/mu_c": 0.6483536585365854, "calib/mu_w": 0.5428260869565218, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0179296875, "calib/std_conf": 0.14564582381123167, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2105.0, "completions/max_terminated_length": 2105.0, "completions/mean_length": 574.82421875, "completions/mean_terminated_length": 577.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.2048, "grad_norm": 0.009189280681312084, "learning_rate": 2.2222222222222224e-07, "loss": 0.0108, "num_tokens": 38045355.0, "reward": 1.5728929042816162, "reward_std": 0.2598698139190674, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.796241044998169, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9365895986557007, "step": 192 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5541661430187256, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.06607843137254915, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05032047253990213, "calib/mean_conf": 0.6386274509803922, "calib/mu_c": 0.66013698630137, "calib/mu_w": 0.6098165137614678, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06607843137254915, "calib/std_conf": 0.11078719413225781, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2171.0, "completions/max_terminated_length": 2171.0, "completions/mean_length": 549.984375, "completions/mean_terminated_length": 552.1412353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.20586666666666667, "grad_norm": 0.010155064053833485, "learning_rate": 1.9444444444444447e-07, "loss": 0.0631, "num_tokens": 38241431.0, "reward": 1.4826997518539429, "reward_std": 0.34446632862091064, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7602722644805908, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9336919784545898, "step": 193 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5665559529892692, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.03239215686274509, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05743484925907005, "calib/mean_conf": 0.6262745098039216, "calib/mu_c": 0.6494736842105264, "calib/mu_w": 0.5920388349514564, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03129411764705882, "calib/std_conf": 0.12509072447727654, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 492.75390625, "completions/mean_terminated_length": 494.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.20693333333333333, "grad_norm": 0.01012510061264038, "learning_rate": 1.6666666666666668e-07, "loss": -0.0096, "num_tokens": 38423088.0, "reward": 1.509757399559021, "reward_std": 0.23923927545547485, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7656105160713196, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9421834945678711, "step": 194 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5831740570846075, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.07458498023715415, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05845948012232416, "calib/mean_conf": 0.6318972332015811, "calib/mu_c": 0.6570833333333334, "calib/mu_w": 0.5986238532110092, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.06865612648221345, "calib/std_conf": 0.11559588405872054, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 501.42578125, "completions/mean_terminated_length": 505.3740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.208, "grad_norm": 0.008480598218739033, "learning_rate": 1.3888888888888888e-07, "loss": -0.002, "num_tokens": 38607005.0, "reward": 1.4583773612976074, "reward_std": 0.20689699053764343, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7502175569534302, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9065118432044983, "step": 195 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5238853503184713, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.0626274509803922, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009576238138567561, "calib/mean_conf": 0.6585490196078433, "calib/mu_c": 0.6622292993630574, "calib/mu_w": 0.6526530612244898, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.052745098039215725, "calib/std_conf": 0.0748793313521476, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1311.0, "completions/max_terminated_length": 1311.0, "completions/mean_length": 422.578125, "completions/mean_terminated_length": 424.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.20906666666666668, "grad_norm": 0.010814669542014599, "learning_rate": 1.1111111111111112e-07, "loss": -0.0152, "num_tokens": 38767297.0, "reward": 1.524308204650879, "reward_std": 0.20253178477287292, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.757500410079956, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9337950944900513, "step": 196 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5414506172839506, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.11517647058823531, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03487037037037044, "calib/mean_conf": 0.6186274509803922, "calib/mu_c": 0.6350370370370372, "calib/mu_w": 0.6001666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10219607843137257, "calib/std_conf": 0.13773830839452345, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1680.0, "completions/max_terminated_length": 1680.0, "completions/mean_length": 563.4453125, "completions/mean_terminated_length": 565.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.21013333333333334, "grad_norm": 0.009498359635472298, "learning_rate": 8.333333333333334e-08, "loss": -0.0003, "num_tokens": 38966163.0, "reward": 1.4292974472045898, "reward_std": 0.3322080075740814, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.736260175704956, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9446693062782288, "step": 197 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5260466602748483, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.08169291338582693, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.022558644934483896, "calib/mean_conf": 0.6216141732283467, "calib/mu_c": 0.6309395973154364, "calib/mu_w": 0.6083809523809525, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05834645669291356, "calib/std_conf": 0.12935086552973982, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 482.859375, "completions/mean_terminated_length": 484.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.2112, "grad_norm": 0.010535329580307007, "learning_rate": 5.555555555555556e-08, "loss": 0.003, "num_tokens": 39144727.0, "reward": 1.4822851419448853, "reward_std": 0.20785585045814514, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.744623064994812, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9211447238922119, "step": 198 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5248013154288846, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.010434782608695634, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025798163880515368, "calib/mean_conf": 0.6496442687747036, "calib/mu_c": 0.658719512195122, "calib/mu_w": 0.6329213483146067, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.005928853754940711, "calib/std_conf": 0.08768583843255115, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2623.0, "completions/max_terminated_length": 2623.0, "completions/mean_length": 543.15625, "completions/mean_terminated_length": 545.2863159179688, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.21226666666666666, "grad_norm": 0.011450027115643024, "learning_rate": 2.777777777777778e-08, "loss": -0.0003, "num_tokens": 39337543.0, "reward": 1.5476951599121094, "reward_std": 0.2952195107936859, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7634691596031189, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9200922846794128, "step": 199 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6058044519582981, "calib/avg_num_step_conf": 0.98046875, "calib/ece": 0.059288537549407, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07738165680473374, "calib/mean_conf": 0.6241897233201582, "calib/mu_c": 0.6498816568047338, "calib/mu_w": 0.5725, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0077470355731225305, "calib/std_conf": 0.13392757899749022, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 535.6171875, "completions/mean_terminated_length": 539.8346557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.21333333333333335, "grad_norm": 0.007556810975074768, "learning_rate": 0.0, "loss": -0.0125, "num_tokens": 39532277.0, "reward": 1.5712368488311768, "reward_std": 0.19881883263587952, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.7790929675102234, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.909574031829834, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.008669512395572383, "train_runtime": 8904.7917, "train_samples_per_second": 5.75, "train_steps_per_second": 0.022 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 39532277, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }