{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.01953125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 584.16015625, "completions/mean_terminated_length": 667.6116333007812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0, "learning_rate": 2.5000000000000004e-07, "loss": 0.0, "num_tokens": 232809.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 1 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 0.9, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/mu_c": NaN, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.9, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 643.5234375, "completions/mean_terminated_length": 725.7356567382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.0, "num_tokens": 476519.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 2 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.01953125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 736.22265625, "completions/mean_terminated_length": 812.3836059570312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0032, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 745928.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 3 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 764.01953125, "completions/mean_terminated_length": 854.1004638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.004266666666666667, "grad_norm": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.0, "num_tokens": 1023365.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 4 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.046875, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.01171875, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 677.6640625, "completions/mean_terminated_length": 803.1574096679688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.0, "learning_rate": 1.25e-06, "loss": 0.0, "num_tokens": 1279215.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 5 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.015625, "calib/ece": 0.595, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.18999999999999995, "calib/mean_conf": 0.895, "calib/mu_c": 0.8, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.495, "calib/std_conf": 0.09499999999999997, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 629.1484375, "completions/mean_terminated_length": 715.8311157226562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.0022278563119471073, "learning_rate": 1.5e-06, "loss": -0.002, "num_tokens": 1521909.0, "reward": 0.008195600472390652, "reward_std": 0.023180657997727394, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0038277343846857548, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.006376933306455612, "step": 6 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.06640625, "calib/ece": 0.075, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.075, "calib/mu_c": NaN, "calib/mu_w": 0.075, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.075, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 728.0703125, "completions/mean_terminated_length": 813.9126586914062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.0008369016577489674, "learning_rate": 1.75e-06, "loss": -0.0008, "num_tokens": 1791399.0, "reward": 0.003267470980063081, "reward_std": 0.009241803549230099, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.003884277306497097, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0037388289347290993, "step": 7 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2809.0, "completions/max_terminated_length": 2809.0, "completions/mean_length": 630.51171875, "completions/mean_terminated_length": 701.7869262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0, "num_tokens": 2035002.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 8 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 705.03125, "completions/mean_terminated_length": 781.3333129882812, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0096, "grad_norm": 0.0, "learning_rate": 2.25e-06, "loss": 0.0, "num_tokens": 2298706.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 9 }, { "calib/answer_extract_rate": 0.03125, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.9299999999999999, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/mean_conf": 0.93, "calib/mu_c": NaN, "calib/mu_w": 0.93, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.9299999999999999, "calib/std_conf": 0.058878405775518984, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 643.71875, "completions/mean_terminated_length": 729.1681518554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0, "learning_rate": 2.5e-06, "loss": 0.0, "num_tokens": 2545978.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 10 }, { "calib/answer_extract_rate": 0.0390625, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.03125, "calib/ece": 0.5333333333333333, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": 0.17500000000000004, "calib/mean_conf": 0.8333333333333334, "calib/mu_c": 0.95, "calib/mu_w": 0.7749999999999999, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.5166666666666666, "calib/std_conf": 0.10274023338281628, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 648.09375, "completions/mean_terminated_length": 700.0505981445312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.0008397590136155486, "learning_rate": 2.7500000000000004e-06, "loss": 0.0073, "num_tokens": 2792050.0, "reward": 0.0070624202489852905, "reward_std": 0.019975541159510612, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0038964843843132257, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0032692127861082554, "step": 11 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2813.0, "completions/max_terminated_length": 2813.0, "completions/mean_length": 721.28125, "completions/mean_terminated_length": 789.0940551757812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.0, "learning_rate": 3e-06, "loss": 0.0, "num_tokens": 3056554.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 12 }, { "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.375, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.375, "calib/mu_c": NaN, "calib/mu_w": 0.375, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.375, "calib/std_conf": 0.375, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 746.140625, "completions/mean_terminated_length": 830.4869384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.0011903855483978987, "learning_rate": 3.2500000000000002e-06, "loss": -0.0006, "num_tokens": 3327838.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 13 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 0.95, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.95, "calib/mu_c": NaN, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.95, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 729.484375, "completions/mean_terminated_length": 798.0684204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.0, "learning_rate": 3.5e-06, "loss": 0.0, "num_tokens": 3595666.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 14 }, { "calib/answer_extract_rate": 0.01953125, "calib/avg_num_step_conf": 0.01953125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 673.76171875, "completions/mean_terminated_length": 743.461181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.0, "learning_rate": 3.7500000000000005e-06, "loss": 0.0, "num_tokens": 3851709.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 15 }, { "calib/answer_extract_rate": 0.01953125, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.0, "calib/mu_c": NaN, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.01953125, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 705.6015625, "completions/mean_terminated_length": 799.2655029296875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.017066666666666667, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "num_tokens": 4116871.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 16 }, { "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.0390625, "calib/ece": 0.8, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/mu_c": NaN, "calib/mu_w": 0.8, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.8, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 621.75390625, "completions/mean_terminated_length": 686.0733032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.018133333333333335, "grad_norm": 0.0009224429959431291, "learning_rate": 4.25e-06, "loss": 0.0027, "num_tokens": 4355248.0, "reward": 0.0020583579316735268, "reward_std": 0.005821915343403816, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0014062500558793545, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0038584317080676556, "step": 17 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.0078125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 705.09375, "completions/mean_terminated_length": 795.1717529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0192, "grad_norm": 0.0, "learning_rate": 4.5e-06, "loss": 0.0, "num_tokens": 4622152.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 18 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.015625, "calib/ece": 0.6, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.6, "calib/mu_c": NaN, "calib/mu_w": 0.6, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.6, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 593.578125, "completions/mean_terminated_length": 652.1716918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.0009278705110773444, "learning_rate": 4.75e-06, "loss": -0.0026, "num_tokens": 4854548.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 19 }, { "calib/answer_extract_rate": 0.04296875, "calib/avg_num_step_conf": 0.11328125, "calib/ece": 0.9, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/mu_c": NaN, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.9, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 712.390625, "completions/mean_terminated_length": 766.2689208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.0014255295973271132, "learning_rate": 5e-06, "loss": -0.0035, "num_tokens": 5117472.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 20 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.0546875, "calib/ece": 0.42500000000000004, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.04999999999999993, "calib/mean_conf": 0.925, "calib/mu_c": 0.9, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.42500000000000004, "calib/std_conf": 0.024999999999999967, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 627.2890625, "completions/mean_terminated_length": 692.1810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0224, "grad_norm": 0.0013866168446838856, "learning_rate": 4.9722222222222224e-06, "loss": 0.0078, "num_tokens": 5356698.0, "reward": 0.008453850634396076, "reward_std": 0.02391109988093376, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0042480467818677425, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.006569306366145611, "step": 21 }, { "calib/answer_extract_rate": 0.03515625, "calib/avg_num_step_conf": 0.0078125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 725.8203125, "completions/mean_terminated_length": 804.372314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.023466666666666667, "grad_norm": 0.0, "learning_rate": 4.944444444444445e-06, "loss": 0.0, "num_tokens": 5620004.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 22 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.015625, "calib/ece": 0.9450000000000001, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.9450000000000001, "calib/mu_c": NaN, "calib/mu_w": 0.9450000000000001, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.9450000000000001, "calib/std_conf": 0.05499999999999999, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 691.91796875, "completions/mean_terminated_length": 756.9700927734375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.024533333333333334, "grad_norm": 0.001854900037869811, "learning_rate": 4.9166666666666665e-06, "loss": -0.0057, "num_tokens": 5876751.0, "reward": 0.0011566466419026256, "reward_std": 0.003271490801125765, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0030640866607427597, "step": 23 }, { "calib/answer_extract_rate": 0.05078125, "calib/avg_num_step_conf": 0.05859375, "calib/ece": 0.8, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/mu_c": NaN, "calib/mu_w": 0.8, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.8, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 648.19140625, "completions/mean_terminated_length": 718.3419799804688, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0256, "grad_norm": 0.0009243428939953446, "learning_rate": 4.888888888888889e-06, "loss": -0.0031, "num_tokens": 6122880.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 24 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.0234375, "calib/ece": 0.44999999999999996, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.09999999999999998, "calib/mean_conf": 0.95, "calib/mu_c": 1.0, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.44999999999999996, "calib/std_conf": 0.04999999999999999, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 649.58203125, "completions/mean_terminated_length": 707.6297607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.02666666666666667, "grad_norm": 0.0013725002063438296, "learning_rate": 4.861111111111111e-06, "loss": -0.0057, "num_tokens": 6368077.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 25 }, { "calib/answer_extract_rate": 0.05859375, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 1.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2976.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 663.62890625, "completions/mean_terminated_length": 696.266357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.027733333333333332, "grad_norm": 0.0011524234432727098, "learning_rate": 4.833333333333333e-06, "loss": -0.0056, "num_tokens": 6618886.0, "reward": 0.0004935335600748658, "reward_std": 0.0013959237840026617, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0004116342170163989, "step": 26 }, { "calib/answer_extract_rate": 0.0546875, "calib/avg_num_step_conf": 0.01953125, "calib/ece": 0.7075, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.25, "calib/mean_conf": 0.7075, "calib/mu_c": NaN, "calib/mu_w": 0.7075, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.7075, "calib/std_conf": 0.17795715776557006, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 640.40234375, "completions/mean_terminated_length": 674.6625366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0288, "grad_norm": 0.0006247347919270396, "learning_rate": 4.805555555555556e-06, "loss": 0.0037, "num_tokens": 6863725.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 27 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.5952380952380952, "calib/avg_num_step_conf": 0.046875, "calib/ece": 0.512, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.3, "calib/gap": 0.16380952380952385, "calib/mean_conf": 0.752, "calib/mu_c": 0.8666666666666667, "calib/mu_w": 0.7028571428571428, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.482, "calib/std_conf": 0.29188353841900716, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 668.0703125, "completions/mean_terminated_length": 724.6864624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.029866666666666666, "grad_norm": 0.0018035010434687138, "learning_rate": 4.777777777777778e-06, "loss": 0.0091, "num_tokens": 7117375.0, "reward": 0.015450541861355305, "reward_std": 0.03422542288899422, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.003554687602445483, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.004692792426794767, "step": 28 }, { "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.875, "calib/avg_num_step_conf": 0.07421875, "calib/ece": 0.67, "calib/final_conf_rate": 0.01953125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.16249999999999998, "calib/mean_conf": 0.8700000000000001, "calib/mu_c": 1.0, "calib/mu_w": 0.8375, "calib/nonempty_final_conf_rate": 0.01953125, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.67, "calib/std_conf": 0.18867962264113206, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 674.35546875, "completions/mean_terminated_length": 710.4320678710938, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.030933333333333334, "grad_norm": 0.001415241975337267, "learning_rate": 4.75e-06, "loss": 0.0066, "num_tokens": 7372818.0, "reward": 0.007501841522753239, "reward_std": 0.018346427008509636, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0029296875, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.00539799127727747, "step": 29 }, { "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.15234375, "calib/ece": 0.8557575757575757, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.2727272727272727, "calib/gap": -0.8159259259259259, "calib/mean_conf": 0.6775757575757576, "calib/mu_c": 0.01, "calib/mu_w": 0.825925925925926, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.6757575757575758, "calib/std_conf": 0.3651043600886454, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 718.16015625, "completions/mean_terminated_length": 789.051513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.032, "grad_norm": 0.002373687457293272, "learning_rate": 4.722222222222222e-06, "loss": 0.0464, "num_tokens": 7639331.0, "reward": 0.02229999378323555, "reward_std": 0.05606692656874657, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0053374567069113255, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.009775063954293728, "step": 30 }, { "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.18181818181818182, "calib/avg_num_step_conf": 0.13671875, "calib/ece": 0.8166666666666667, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.01171875, "calib/frac_conf_gt_0.9": 0.5833333333333334, "calib/gap": -0.10909090909090902, "calib/mean_conf": 0.9, "calib/mu_c": 0.8, "calib/mu_w": 0.9090909090909091, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.8166666666666667, "calib/std_conf": 0.0889756521002609, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 673.96875, "completions/mean_terminated_length": 718.9000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.03306666666666667, "grad_norm": 0.001537429285235703, "learning_rate": 4.694444444444445e-06, "loss": 0.02, "num_tokens": 7893459.0, "reward": 0.007447539828717709, "reward_std": 0.021064823493361473, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0020898436196148396, "rewards/format_reward_step": 0.01171875, "rewards/stepwise_brier_reward": 0.005297971423715353, "step": 31 }, { "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.7222222222222222, "calib/avg_num_step_conf": 0.11328125, "calib/ece": 0.5984848484848484, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.45454545454545453, "calib/gap": 0.20740740740740748, "calib/mean_conf": 0.7803030303030302, "calib/mu_c": 0.95, "calib/mu_w": 0.7425925925925925, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.5984848484848484, "calib/std_conf": 0.2091128666555893, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2920.0, "completions/max_terminated_length": 2920.0, "completions/mean_length": 667.45703125, "completions/mean_terminated_length": 739.692626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.034133333333333335, "grad_norm": 0.002994696842506528, "learning_rate": 4.666666666666667e-06, "loss": -0.0007, "num_tokens": 8146712.0, "reward": 0.03033481538295746, "reward_std": 0.07062290608882904, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.016577690839767456, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.014746379107236862, "step": 32 }, { "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.7321428571428571, "calib/avg_num_step_conf": 0.13671875, "calib/ece": 0.559375, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": 0.24642857142857144, "calib/mean_conf": 0.684375, "calib/mu_c": 0.9, "calib/mu_w": 0.6535714285714286, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.559375, "calib/std_conf": 0.29406565487149294, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 602.01953125, "completions/mean_terminated_length": 642.1541748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0352, "grad_norm": 0.0028193776961416006, "learning_rate": 4.638888888888889e-06, "loss": 0.0234, "num_tokens": 8383381.0, "reward": 0.024664167314767838, "reward_std": 0.06388188898563385, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.017470702528953552, "rewards/format_reward_step": 0.03125, "rewards/stepwise_brier_reward": 0.019965259358286858, "step": 33 }, { "calib/answer_extract_rate": 0.19921875, "calib/auroc": 0.42800000000000005, "calib/avg_num_step_conf": 0.37109375, "calib/ece": 0.6201333333333334, "calib/final_conf_rate": 0.1171875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.26666666666666666, "calib/gap": 0.04143999999999992, "calib/mean_conf": 0.7534666666666666, "calib/mu_c": 0.788, "calib/mu_w": 0.7465600000000001, "calib/nonempty_final_conf_rate": 0.1171875, "calib/nonempty_reasoning_rate": 0.21484375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.6034666666666667, "calib/std_conf": 0.28588164606276417, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 567.203125, "completions/mean_terminated_length": 592.6693725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03626666666666667, "grad_norm": 0.003705602139234543, "learning_rate": 4.611111111111112e-06, "loss": 0.0529, "num_tokens": 8609377.0, "reward": 0.055107709020376205, "reward_std": 0.11008341610431671, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.027791012078523636, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.03672381490468979, "step": 34 }, { "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.43333333333333335, "calib/avg_num_step_conf": 0.3046875, "calib/ece": 0.6269482758620689, "calib/final_conf_rate": 0.11328125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.41379310344827586, "calib/gap": 0.0699375000000001, "calib/mean_conf": 0.7821206896551722, "calib/mu_c": 0.8400000000000001, "calib/mu_w": 0.7700625, "calib/nonempty_final_conf_rate": 0.11328125, "calib/nonempty_reasoning_rate": 0.18359375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.6183275862068965, "calib/std_conf": 0.2577841136049904, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 693.68359375, "completions/mean_terminated_length": 727.7991333007812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.037333333333333336, "grad_norm": 0.004338345490396023, "learning_rate": 4.583333333333333e-06, "loss": 0.0167, "num_tokens": 8871896.0, "reward": 0.05112885683774948, "reward_std": 0.11319087445735931, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.027192480862140656, "rewards/format_reward_step": 0.05859375, "rewards/stepwise_brier_reward": 0.03294296935200691, "step": 35 }, { "calib/answer_extract_rate": 0.3046875, "calib/auroc": 0.6007751937984497, "calib/avg_num_step_conf": 0.4296875, "calib/ece": 0.5844320787878787, "calib/final_conf_rate": 0.21484375, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.34545454545454546, "calib/gap": 0.0745636201550387, "calib/mean_conf": 0.7517048060606061, "calib/mu_c": 0.81, "calib/mu_w": 0.7354363798449614, "calib/nonempty_final_conf_rate": 0.21484375, "calib/nonempty_reasoning_rate": 0.34375, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.5589775333333333, "calib/std_conf": 0.287776394163258, "calib/step_conf_rate": 0.1796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 601.33203125, "completions/mean_terminated_length": 625.7764282226562, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0384, "grad_norm": 0.004933076910674572, "learning_rate": 4.555555555555556e-06, "loss": 0.0768, "num_tokens": 9104229.0, "reward": 0.12337145209312439, "reward_std": 0.2592686712741852, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.052323076874017715, "rewards/format_reward_step": 0.09765625, "rewards/stepwise_brier_reward": 0.06852716207504272, "step": 36 }, { "calib/answer_extract_rate": 0.30078125, "calib/auroc": 0.5104761904761905, "calib/avg_num_step_conf": 0.54296875, "calib/ece": 0.5085999999999999, "calib/final_conf_rate": 0.1953125, "calib/format_rate": 0.109375, "calib/frac_conf_gt_0.9": 0.48, "calib/gap": 0.10104761904761894, "calib/mean_conf": 0.8005999999999999, "calib/mu_c": 0.8713333333333332, "calib/mu_w": 0.7702857142857142, "calib/nonempty_final_conf_rate": 0.1953125, "calib/nonempty_reasoning_rate": 0.359375, "calib/nonempty_step_conf_rate": 0.21484375, "calib/pce": 0.5045999999999999, "calib/std_conf": 0.24287165334801836, "calib/step_conf_rate": 0.21484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2856.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 581.85546875, "completions/mean_terminated_length": 605.5081176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.039466666666666664, "grad_norm": 0.005056615453213453, "learning_rate": 4.527777777777778e-06, "loss": 0.0466, "num_tokens": 9335960.0, "reward": 0.12689536809921265, "reward_std": 0.22808219492435455, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.0597599595785141, "rewards/format_reward_step": 0.109375, "rewards/stepwise_brier_reward": 0.07868652790784836, "step": 37 }, { "calib/answer_extract_rate": 0.39453125, "calib/auroc": 0.5423728813559322, "calib/avg_num_step_conf": 0.68359375, "calib/ece": 0.5359868421052632, "calib/final_conf_rate": 0.296875, "calib/format_rate": 0.16796875, "calib/frac_conf_gt_0.9": 0.23684210526315788, "calib/gap": 0.04203888334995021, "calib/mean_conf": 0.7367763157894737, "calib/mu_c": 0.7694117647058824, "calib/mu_w": 0.7273728813559321, "calib/nonempty_final_conf_rate": 0.296875, "calib/nonempty_reasoning_rate": 0.453125, "calib/nonempty_step_conf_rate": 0.265625, "calib/pce": 0.5245394736842105, "calib/std_conf": 0.26233353612510213, "calib/step_conf_rate": 0.265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 582.05078125, "completions/mean_terminated_length": 613.1892700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.04053333333333333, "grad_norm": 0.005831545684486628, "learning_rate": 4.5e-06, "loss": 0.0665, "num_tokens": 9567533.0, "reward": 0.15777722001075745, "reward_std": 0.30419182777404785, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.08795741945505142, "rewards/format_reward_step": 0.16796875, "rewards/stepwise_brier_reward": 0.10675650835037231, "step": 38 }, { "calib/answer_extract_rate": 0.48828125, "calib/auroc": 0.5432098765432098, "calib/avg_num_step_conf": 0.94140625, "calib/ece": 0.590637037037037, "calib/final_conf_rate": 0.3515625, "calib/format_rate": 0.21875, "calib/frac_conf_gt_0.9": 0.28888888888888886, "calib/gap": 0.006287037037036924, "calib/mean_conf": 0.7699703703703704, "calib/mu_c": 0.7749999999999999, "calib/mu_w": 0.768712962962963, "calib/nonempty_final_conf_rate": 0.3515625, "calib/nonempty_reasoning_rate": 0.546875, "calib/nonempty_step_conf_rate": 0.33984375, "calib/pce": 0.5803037037037037, "calib/std_conf": 0.2472235537764113, "calib/step_conf_rate": 0.33984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 551.421875, "completions/mean_terminated_length": 555.7637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0416, "grad_norm": 0.006903126370161772, "learning_rate": 4.472222222222223e-06, "loss": 0.051, "num_tokens": 9790465.0, "reward": 0.2162027508020401, "reward_std": 0.3284416198730469, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.10541588068008423, "rewards/format_reward_step": 0.21875, "rewards/stepwise_brier_reward": 0.14460425078868866, "step": 39 }, { "calib/answer_extract_rate": 0.52734375, "calib/auroc": 0.4849439775910364, "calib/avg_num_step_conf": 0.91015625, "calib/ece": 0.5725723293386139, "calib/final_conf_rate": 0.39453125, "calib/format_rate": 0.25, "calib/frac_conf_gt_0.9": 0.2871287128712871, "calib/gap": 0.0035876590982260037, "calib/mean_conf": 0.7242475768633662, "calib/mu_c": 0.7272313725490196, "calib/mu_w": 0.7236437134507936, "calib/nonempty_final_conf_rate": 0.39453125, "calib/nonempty_reasoning_rate": 0.5703125, "calib/nonempty_step_conf_rate": 0.3515625, "calib/pce": 0.5642515372594059, "calib/std_conf": 0.2690641315047504, "calib/step_conf_rate": 0.3515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2740.0, "completions/max_terminated_length": 2740.0, "completions/mean_length": 581.7578125, "completions/mean_terminated_length": 593.3466186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 33.0, "epoch": 0.042666666666666665, "grad_norm": 0.005969388876110315, "learning_rate": 4.444444444444444e-06, "loss": 0.0536, "num_tokens": 10021835.0, "reward": 0.1922258585691452, "reward_std": 0.31835484504699707, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.11862140148878098, "rewards/format_reward_step": 0.25, "rewards/stepwise_brier_reward": 0.15041063725948334, "step": 40 }, { "calib/answer_extract_rate": 0.6328125, "calib/auroc": 0.6348139255702281, "calib/avg_num_step_conf": 1.3515625, "calib/ece": 0.4594850746268657, "calib/final_conf_rate": 0.5234375, "calib/format_rate": 0.33984375, "calib/frac_conf_gt_0.9": 0.3582089552238806, "calib/gap": 0.10644921968787524, "calib/mean_conf": 0.810231343283582, "calib/mu_c": 0.8777551020408163, "calib/mu_w": 0.7713058823529411, "calib/nonempty_final_conf_rate": 0.5234375, "calib/nonempty_reasoning_rate": 0.6953125, "calib/nonempty_step_conf_rate": 0.46484375, "calib/pce": 0.4520223880597016, "calib/std_conf": 0.22017079239437895, "calib/step_conf_rate": 0.46484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2761.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 467.984375, "completions/mean_terminated_length": 469.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.04373333333333333, "grad_norm": 0.006464121863245964, "learning_rate": 4.416666666666667e-06, "loss": 0.0262, "num_tokens": 10224567.0, "reward": 0.42656055092811584, "reward_std": 0.527800977230072, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.20245546102523804, "rewards/format_reward_step": 0.33984375, "rewards/stepwise_brier_reward": 0.22789371013641357, "step": 41 }, { "calib/answer_extract_rate": 0.75, "calib/auroc": 0.5936829952168894, "calib/avg_num_step_conf": 1.4609375, "calib/ece": 0.5021393939393939, "calib/final_conf_rate": 0.6875, "calib/format_rate": 0.4140625, "calib/frac_conf_gt_0.9": 0.23295454545454544, "calib/gap": 0.08260315575347699, "calib/mean_conf": 0.7600939393939395, "calib/mu_c": 0.8206382978723401, "calib/mu_w": 0.7380351421188631, "calib/nonempty_final_conf_rate": 0.6875, "calib/nonempty_reasoning_rate": 0.83203125, "calib/nonempty_step_conf_rate": 0.53515625, "calib/pce": 0.4975939393939393, "calib/std_conf": 0.22961455958490537, "calib/step_conf_rate": 0.53515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 426.38671875, "completions/mean_terminated_length": 426.38671875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.0448, "grad_norm": 0.007076622918248177, "learning_rate": 4.388888888888889e-06, "loss": -0.0324, "num_tokens": 10413770.0, "reward": 0.4534400701522827, "reward_std": 0.5130658149719238, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.24723786115646362, "rewards/format_reward_step": 0.4140625, "rewards/stepwise_brier_reward": 0.309909462928772, "step": 42 }, { "calib/answer_extract_rate": 0.75, "calib/auroc": 0.5842462180490351, "calib/avg_num_step_conf": 1.87109375, "calib/ece": 0.5750532544378698, "calib/final_conf_rate": 0.66015625, "calib/format_rate": 0.4296875, "calib/frac_conf_gt_0.9": 0.22485207100591717, "calib/gap": 0.05968622848200311, "calib/mean_conf": 0.7206272189349112, "calib/mu_c": 0.7707777777777777, "calib/mu_w": 0.7110915492957746, "calib/nonempty_final_conf_rate": 0.66015625, "calib/nonempty_reasoning_rate": 0.8125, "calib/nonempty_step_conf_rate": 0.5625, "calib/pce": 0.5679585798816568, "calib/std_conf": 0.24888318015815863, "calib/step_conf_rate": 0.5625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 439.2578125, "completions/mean_terminated_length": 440.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.04586666666666667, "grad_norm": 0.007090613711625338, "learning_rate": 4.361111111111112e-06, "loss": 0.0476, "num_tokens": 10607124.0, "reward": 0.3386271595954895, "reward_std": 0.39735865592956543, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.2085159420967102, "rewards/format_reward_step": 0.4296875, "rewards/stepwise_brier_reward": 0.28122684359550476, "step": 43 }, { "calib/answer_extract_rate": 0.76953125, "calib/auroc": 0.5903628117913833, "calib/avg_num_step_conf": 1.6953125, "calib/ece": 0.5652112994350283, "calib/final_conf_rate": 0.69140625, "calib/format_rate": 0.48828125, "calib/frac_conf_gt_0.9": 0.1864406779661017, "calib/gap": 0.06537142857142841, "calib/mean_conf": 0.7133751412429379, "calib/mu_c": 0.7676666666666666, "calib/mu_w": 0.7022952380952382, "calib/nonempty_final_conf_rate": 0.69140625, "calib/nonempty_reasoning_rate": 0.82421875, "calib/nonempty_step_conf_rate": 0.58203125, "calib/pce": 0.5545474576271188, "calib/std_conf": 0.26426165075547436, "calib/step_conf_rate": 0.58203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 424.12890625, "completions/mean_terminated_length": 427.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.046933333333333334, "grad_norm": 0.007667779456824064, "learning_rate": 4.333333333333334e-06, "loss": 0.0207, "num_tokens": 10797701.0, "reward": 0.3810100555419922, "reward_std": 0.44098833203315735, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.2581155300140381, "rewards/format_reward_step": 0.48828125, "rewards/stepwise_brier_reward": 0.2968715727329254, "step": 44 }, { "calib/answer_extract_rate": 0.875, "calib/auroc": 0.5488539523949456, "calib/avg_num_step_conf": 2.5390625, "calib/ece": 0.5292746010832967, "calib/final_conf_rate": 0.80859375, "calib/format_rate": 0.625, "calib/frac_conf_gt_0.9": 0.20772946859903382, "calib/gap": 0.06627188665972095, "calib/mean_conf": 0.7273422339335384, "calib/mu_c": 0.7804878048780488, "calib/mu_w": 0.7142159182183279, "calib/nonempty_final_conf_rate": 0.80859375, "calib/nonempty_reasoning_rate": 0.9140625, "calib/nonempty_step_conf_rate": 0.70703125, "calib/pce": 0.5292746010832967, "calib/std_conf": 0.2359906160423243, "calib/step_conf_rate": 0.70703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1338.0, "completions/max_terminated_length": 1338.0, "completions/mean_length": 369.1953125, "completions/mean_terminated_length": 370.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.048, "grad_norm": 0.007053131703287363, "learning_rate": 4.305555555555556e-06, "loss": 0.0124, "num_tokens": 10972943.0, "reward": 0.5057880878448486, "reward_std": 0.5060071349143982, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.3368590772151947, "rewards/format_reward_step": 0.625, "rewards/stepwise_brier_reward": 0.41193413734436035, "step": 45 }, { "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.4821515594541911, "calib/avg_num_step_conf": 2.92578125, "calib/ece": 0.5230832354859751, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.6875, "calib/frac_conf_gt_0.9": 0.2054794520547945, "calib/gap": -0.027187009189640787, "calib/mean_conf": 0.7066448793215917, "calib/mu_c": 0.6854166666666667, "calib/mu_w": 0.7126036758563075, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.81640625, "calib/pce": 0.505275016307893, "calib/std_conf": 0.2716084230018331, "calib/step_conf_rate": 0.81640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2120.0, "completions/max_terminated_length": 2120.0, "completions/mean_length": 327.48046875, "completions/mean_terminated_length": 328.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.04906666666666667, "grad_norm": 0.00798877328634262, "learning_rate": 4.277777777777778e-06, "loss": 0.0421, "num_tokens": 11137226.0, "reward": 0.5734426975250244, "reward_std": 0.4912108778953552, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.3600817918777466, "rewards/format_reward_step": 0.6875, "rewards/stepwise_brier_reward": 0.4704821705818176, "step": 46 }, { "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.5108861726508785, "calib/avg_num_step_conf": 2.671875, "calib/ece": 0.52622634643377, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.7734375, "calib/frac_conf_gt_0.9": 0.17903930131004367, "calib/gap": -0.001643111790170737, "calib/mean_conf": 0.6920560407569141, "calib/mu_c": 0.6907142857142856, "calib/mu_w": 0.6923573975044564, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.8515625, "calib/pce": 0.5174381368267831, "calib/std_conf": 0.2640733101582573, "calib/step_conf_rate": 0.8515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1529.0, "completions/max_terminated_length": 1529.0, "completions/mean_length": 281.4453125, "completions/mean_terminated_length": 282.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.050133333333333335, "grad_norm": 0.008706745691597462, "learning_rate": 4.25e-06, "loss": -0.0012, "num_tokens": 11290932.0, "reward": 0.593336820602417, "reward_std": 0.4346812069416046, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.41285210847854614, "rewards/format_reward_step": 0.7734375, "rewards/stepwise_brier_reward": 0.5507680773735046, "step": 47 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4797202797202797, "calib/avg_num_step_conf": 2.421875, "calib/ece": 0.4998348675034866, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.80078125, "calib/frac_conf_gt_0.9": 0.100418410041841, "calib/gap": -0.021629386169386144, "calib/mean_conf": 0.6506019525801954, "calib/mu_c": 0.6329545454545454, "calib/mu_w": 0.6545839316239316, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.8671875, "calib/pce": 0.48316820083682, "calib/std_conf": 0.26267684598556074, "calib/step_conf_rate": 0.8671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 273.65234375, "completions/mean_terminated_length": 273.65234375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.0512, "grad_norm": 0.00950379017740488, "learning_rate": 4.222222222222223e-06, "loss": 0.0538, "num_tokens": 11440355.0, "reward": 0.6238257884979248, "reward_std": 0.43043237924575806, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.4479580819606781, "rewards/format_reward_step": 0.80078125, "rewards/stepwise_brier_reward": 0.5759493708610535, "step": 48 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.472853125675968, "calib/avg_num_step_conf": 2.40625, "calib/ece": 0.44603238866396755, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.05668016194331984, "calib/gap": -0.022916937053860997, "calib/mean_conf": 0.6088663967611335, "calib/mu_c": 0.5902173913043479, "calib/mu_w": 0.6131343283582089, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.43433198380566795, "calib/std_conf": 0.2598775995840334, "calib/step_conf_rate": 0.921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 831.0, "completions/max_terminated_length": 831.0, "completions/mean_length": 223.96875, "completions/mean_terminated_length": 224.84707641601562, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.05226666666666667, "grad_norm": 0.010475818999111652, "learning_rate": 4.194444444444445e-06, "loss": -0.0446, "num_tokens": 11577907.0, "reward": 0.6920686960220337, "reward_std": 0.41203224658966064, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.5262347459793091, "rewards/format_reward_step": 0.88671875, "rewards/stepwise_brier_reward": 0.6423674821853638, "step": 49 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5308615967979776, "calib/avg_num_step_conf": 2.47265625, "calib/ece": 0.44089667761956924, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.07228915662650602, "calib/gap": 0.019779458796943428, "calib/mean_conf": 0.59671997079226, "calib/mu_c": 0.6127659574468084, "calib/mu_w": 0.592986498649865, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.424430814165754, "calib/std_conf": 0.2535598394856577, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 934.0, "completions/max_terminated_length": 934.0, "completions/mean_length": 254.2890625, "completions/mean_terminated_length": 255.28628540039062, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.05333333333333334, "grad_norm": 0.00964977964758873, "learning_rate": 4.166666666666667e-06, "loss": -0.0643, "num_tokens": 11724045.0, "reward": 0.7111487984657288, "reward_std": 0.3974384367465973, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.5488967895507812, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.6561766266822815, "step": 50 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5422007941009643, "calib/avg_num_step_conf": 2.56640625, "calib/ece": 0.3676209677419355, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.05241935483870968, "calib/gap": 0.03715825297787856, "calib/mean_conf": 0.5325403225806451, "calib/mu_c": 0.5632558139534883, "calib/mu_w": 0.5260975609756098, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.3633870967741936, "calib/std_conf": 0.26420528733914556, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1129.0, "completions/max_terminated_length": 1129.0, "completions/mean_length": 263.0625, "completions/mean_terminated_length": 264.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.0544, "grad_norm": 0.010027616284787655, "learning_rate": 4.138888888888889e-06, "loss": -0.027, "num_tokens": 11876365.0, "reward": 0.7352821826934814, "reward_std": 0.36998486518859863, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.610413670539856, "rewards/format_reward_step": 0.9140625, "rewards/stepwise_brier_reward": 0.6671764254570007, "step": 51 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5019496632399858, "calib/avg_num_step_conf": 2.34765625, "calib/ece": 0.30011612021857925, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.01639344262295082, "calib/gap": 0.001085017133404087, "calib/mean_conf": 0.46773907103825135, "calib/mu_c": 0.46854838709677415, "calib/mu_w": 0.46746336996337007, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.2568784153005465, "calib/std_conf": 0.2673715065884634, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1073.0, "completions/max_terminated_length": 1073.0, "completions/mean_length": 285.71484375, "completions/mean_terminated_length": 286.8352966308594, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.055466666666666664, "grad_norm": 0.009401632472872734, "learning_rate": 4.111111111111111e-06, "loss": -0.0295, "num_tokens": 12033140.0, "reward": 0.8306794762611389, "reward_std": 0.42442840337753296, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.6383389234542847, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7085399031639099, "step": 52 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5132941848136368, "calib/avg_num_step_conf": 2.484375, "calib/ece": 0.23466786786786786, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": 0.0066150221667715114, "calib/mean_conf": 0.4082921321321321, "calib/mu_c": 0.41313432835820896, "calib/mu_w": 0.40651930619143745, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.18747999999999998, "calib/std_conf": 0.2618325792640843, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 920.0, "completions/max_terminated_length": 920.0, "completions/mean_length": 304.16015625, "completions/mean_terminated_length": 305.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.05653333333333333, "grad_norm": 0.008001954294741154, "learning_rate": 4.083333333333334e-06, "loss": -0.0696, "num_tokens": 12192509.0, "reward": 0.8803744316101074, "reward_std": 0.404864639043808, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.6734973192214966, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.7526279091835022, "step": 53 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4499376385809313, "calib/avg_num_step_conf": 2.88671875, "calib/ece": 0.23645489417989418, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.03331431079083513, "calib/mean_conf": 0.37891521164021164, "calib/mu_c": 0.3572344696969697, "calib/mu_w": 0.39054878048780484, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.13308187830687832, "calib/std_conf": 0.23750741110241325, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1160.0, "completions/max_terminated_length": 1160.0, "completions/mean_length": 316.30859375, "completions/mean_terminated_length": 317.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.0576, "grad_norm": 0.007840687409043312, "learning_rate": 4.055555555555556e-06, "loss": -0.014, "num_tokens": 12355396.0, "reward": 0.9629466533660889, "reward_std": 0.404100239276886, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6632779836654663, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7689805626869202, "step": 54 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5006683375104428, "calib/avg_num_step_conf": 3.12890625, "calib/ece": 0.18966403162055342, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.005073934837092731, "calib/mean_conf": 0.3447628458498023, "calib/mu_c": 0.34095238095238095, "calib/mu_w": 0.3460263157894737, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.14270750988142294, "calib/std_conf": 0.2376830334506864, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1283.0, "completions/max_terminated_length": 1283.0, "completions/mean_length": 327.33203125, "completions/mean_terminated_length": 328.6156921386719, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.058666666666666666, "grad_norm": 0.007699980866163969, "learning_rate": 4.027777777777779e-06, "loss": -0.0497, "num_tokens": 12522697.0, "reward": 0.8905273079872131, "reward_std": 0.3477417826652527, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7088057398796082, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.7819976806640625, "step": 55 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5157967032967032, "calib/avg_num_step_conf": 2.82421875, "calib/ece": 0.1903705179282869, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002797435897435807, "calib/mean_conf": 0.28532669322709164, "calib/mu_c": 0.2875, "calib/mu_w": 0.28470256410256417, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.12629482071713147, "calib/std_conf": 0.21593719770159248, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1160.0, "completions/max_terminated_length": 1160.0, "completions/mean_length": 373.734375, "completions/mean_terminated_length": 375.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 41.0, "epoch": 0.05973333333333333, "grad_norm": 0.006527104414999485, "learning_rate": 4.000000000000001e-06, "loss": -0.0028, "num_tokens": 12700893.0, "reward": 0.881033182144165, "reward_std": 0.3198215663433075, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7373512387275696, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7931802272796631, "step": 56 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5634366361123235, "calib/avg_num_step_conf": 3.69140625, "calib/ece": 0.15581027667984193, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03008802560744947, "calib/mean_conf": 0.23588932806324114, "calib/mu_c": 0.25658227848101267, "calib/mu_w": 0.2264942528735632, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03972332015810279, "calib/std_conf": 0.18538630643438173, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2483.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 389.6875, "completions/mean_terminated_length": 389.6875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.0608, "grad_norm": 0.006315163802355528, "learning_rate": 3.972222222222223e-06, "loss": 0.0188, "num_tokens": 12883125.0, "reward": 0.9760843515396118, "reward_std": 0.308395653963089, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7303789854049683, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8076419234275818, "step": 57 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.540296052631579, "calib/avg_num_step_conf": 3.41015625, "calib/ece": 0.11228346456692911, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.028472039473684208, "calib/mean_conf": 0.20385826771653542, "calib/mu_c": 0.22515625000000003, "calib/mu_w": 0.19668421052631582, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03208661417322833, "calib/std_conf": 0.16255851638884533, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1585.0, "completions/max_terminated_length": 1585.0, "completions/mean_length": 416.50390625, "completions/mean_terminated_length": 418.13726806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.06186666666666667, "grad_norm": 0.006129554007202387, "learning_rate": 3.944444444444445e-06, "loss": 0.0073, "num_tokens": 13071750.0, "reward": 0.934508740901947, "reward_std": 0.2841190993785858, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7646179795265198, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8228615522384644, "step": 58 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5461218497675556, "calib/avg_num_step_conf": 2.7578125, "calib/ece": 0.14782000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014022918195905676, "calib/mean_conf": 0.18078000000000002, "calib/mu_c": 0.191044776119403, "calib/mu_w": 0.1770218579234973, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0303, "calib/std_conf": 0.1345402973090219, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1427.0, "completions/max_terminated_length": 1427.0, "completions/mean_length": 393.22265625, "completions/mean_terminated_length": 394.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.06293333333333333, "grad_norm": 0.006685024127364159, "learning_rate": 3.916666666666667e-06, "loss": -0.0082, "num_tokens": 13254343.0, "reward": 0.9424407482147217, "reward_std": 0.25202226638793945, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7529077529907227, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.832697331905365, "step": 59 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48704967349035133, "calib/avg_num_step_conf": 2.875, "calib/ece": 0.19244094488188979, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010248734316530977, "calib/mean_conf": 0.1674015748031496, "calib/mu_c": 0.16025974025974024, "calib/mu_w": 0.17050847457627122, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.028346456692913382, "calib/std_conf": 0.14151159546504766, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1183.0, "completions/max_terminated_length": 1183.0, "completions/mean_length": 381.546875, "completions/mean_terminated_length": 383.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.064, "grad_norm": 0.006986516993492842, "learning_rate": 3.88888888888889e-06, "loss": -0.0189, "num_tokens": 13436555.0, "reward": 0.9669762849807739, "reward_std": 0.2988584637641907, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7100223302841187, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8447355628013611, "step": 60 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5678276189185527, "calib/avg_num_step_conf": 3.328125, "calib/ece": 0.23713147410358568, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027661607263856913, "calib/mean_conf": 0.1513147410358566, "calib/mu_c": 0.16861702127659575, "calib/mu_w": 0.14095541401273884, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.006972111553784861, "calib/std_conf": 0.12346372494108282, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 377.54296875, "completions/mean_terminated_length": 379.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.06506666666666666, "grad_norm": 0.005949671845883131, "learning_rate": 3.861111111111112e-06, "loss": 0.0008, "num_tokens": 13612950.0, "reward": 1.017664909362793, "reward_std": 0.28337812423706055, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.687488317489624, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8253706097602844, "step": 61 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5463499851323224, "calib/avg_num_step_conf": 3.02734375, "calib/ece": 0.18968379446640318, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013424769550996113, "calib/mean_conf": 0.13205533596837946, "calib/mu_c": 0.14144736842105263, "calib/mu_w": 0.12802259887005651, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.010671936758893281, "calib/std_conf": 0.09509993132453148, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2034.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 429.671875, "completions/mean_terminated_length": 431.3569030761719, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.06613333333333334, "grad_norm": 0.005477833095937967, "learning_rate": 3.833333333333334e-06, "loss": 0.0293, "num_tokens": 13805706.0, "reward": 0.9830002784729004, "reward_std": 0.23470573127269745, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7375777363777161, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.878720760345459, "step": 62 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5444078947368423, "calib/avg_num_step_conf": 2.859375, "calib/ece": 0.2141792828685259, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009222953216374272, "calib/mean_conf": 0.11809163346613548, "calib/mu_c": 0.12437500000000001, "calib/mu_w": 0.11515204678362574, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.006772908366533861, "calib/std_conf": 0.07993141712709298, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 447.6015625, "completions/mean_terminated_length": 447.6015625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.0672, "grad_norm": 0.00584860285744071, "learning_rate": 3.8055555555555556e-06, "loss": 0.0147, "num_tokens": 14004612.0, "reward": 0.9794288873672485, "reward_std": 0.29020506143569946, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7084218263626099, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8664965629577637, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5040711307972903, "calib/avg_num_step_conf": 3.3671875, "calib/ece": 0.2938339920948616, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027872589890568372, "calib/mean_conf": 0.11565217391304349, "calib/mu_c": 0.11732673267326736, "calib/mu_w": 0.11453947368421052, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005138339920948615, "calib/std_conf": 0.09020874762154729, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2021.0, "completions/max_terminated_length": 2021.0, "completions/mean_length": 430.953125, "completions/mean_terminated_length": 432.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.06826666666666667, "grad_norm": 0.005867753177881241, "learning_rate": 3.777777777777778e-06, "loss": 0.0771, "num_tokens": 14194392.0, "reward": 1.0440330505371094, "reward_std": 0.25283029675483704, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6612000465393066, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8818568587303162, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5328330206378987, "calib/avg_num_step_conf": 3.39453125, "calib/ece": 0.2721960784313725, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008724872688287325, "calib/mean_conf": 0.0987843137254902, "calib/mu_c": 0.1043956043956044, "calib/mu_w": 0.09567073170731707, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007058823529411765, "calib/std_conf": 0.09795403358268236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1859.0, "completions/max_terminated_length": 1859.0, "completions/mean_length": 393.78515625, "completions/mean_terminated_length": 395.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.06933333333333333, "grad_norm": 0.005394121166318655, "learning_rate": 3.7500000000000005e-06, "loss": 0.0359, "num_tokens": 14375905.0, "reward": 1.0301740169525146, "reward_std": 0.20167192816734314, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6878316402435303, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9278453588485718, "step": 65 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5066545250770524, "calib/avg_num_step_conf": 3.8828125, "calib/ece": 0.2540476190476191, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002675819557298967, "calib/mean_conf": 0.09753968253968255, "calib/mu_c": 0.09930232558139535, "calib/mu_w": 0.09662650602409638, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.005158730158730155, "calib/std_conf": 0.07080810764196235, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 508.9296875, "completions/mean_terminated_length": 510.9255065917969, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.0704, "grad_norm": 0.004965396132320166, "learning_rate": 3.7222222222222225e-06, "loss": 0.0431, "num_tokens": 14588223.0, "reward": 1.0066497325897217, "reward_std": 0.22029651701450348, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.693121075630188, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9059818387031555, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.551000272182907, "calib/avg_num_step_conf": 3.73828125, "calib/ece": 0.26639215686274514, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009852340772999471, "calib/mean_conf": 0.08184313725490196, "calib/mu_c": 0.08829545454545455, "calib/mu_w": 0.07844311377245508, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.06126233290770837, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1602.0, "completions/max_terminated_length": 1602.0, "completions/mean_length": 459.57421875, "completions/mean_terminated_length": 461.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.07146666666666666, "grad_norm": 0.005392501130700111, "learning_rate": 3.694444444444445e-06, "loss": 0.0618, "num_tokens": 14786562.0, "reward": 1.0170819759368896, "reward_std": 0.20529669523239136, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6934664249420166, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9126452207565308, "step": 67 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5467941059338908, "calib/avg_num_step_conf": 4.19140625, "calib/ece": 0.29388235294117654, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017702110712863178, "calib/mean_conf": 0.08650980392156864, "calib/mu_c": 0.08763440860215052, "calib/mu_w": 0.0858641975308642, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00784313725490196, "calib/std_conf": 0.062220157048168946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 430.3671875, "completions/mean_terminated_length": 432.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.07253333333333334, "grad_norm": 0.005932637490332127, "learning_rate": 3.6666666666666666e-06, "loss": 0.0064, "num_tokens": 14976504.0, "reward": 1.0296599864959717, "reward_std": 0.19540664553642273, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6813062429428101, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9060271978378296, "step": 68 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.488728323699422, "calib/avg_num_step_conf": 4.49609375, "calib/ece": 0.25498023715415014, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008100433526011558, "calib/mean_conf": 0.07466403162055336, "calib/mu_c": 0.069125, "calib/mu_w": 0.07722543352601156, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006719367588932806, "calib/std_conf": 0.06855067779127547, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2401.0, "completions/max_terminated_length": 2401.0, "completions/mean_length": 506.68359375, "completions/mean_terminated_length": 506.68359375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.0736, "grad_norm": 0.005567294545471668, "learning_rate": 3.638888888888889e-06, "loss": 0.0527, "num_tokens": 15186391.0, "reward": 0.986290693283081, "reward_std": 0.20521283149719238, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7012136578559875, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9005477428436279, "step": 69 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5348829201101928, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.28343043478260865, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005661515151515167, "calib/mean_conf": 0.06755770750988142, "calib/mu_c": 0.07125000000000001, "calib/mu_w": 0.06558848484848484, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0015810276679841897, "calib/std_conf": 0.04427719536654392, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2458.0, "completions/max_terminated_length": 2458.0, "completions/mean_length": 532.078125, "completions/mean_terminated_length": 532.078125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.07466666666666667, "grad_norm": 0.005367174278944731, "learning_rate": 3.6111111111111115e-06, "loss": 0.0722, "num_tokens": 15405275.0, "reward": 1.0083911418914795, "reward_std": 0.21531283855438232, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6785225868225098, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9108941555023193, "step": 70 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5870109546165885, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.21571314741035857, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011325899843505474, "calib/mean_conf": 0.07034262948207172, "calib/mu_c": 0.07846478873239436, "calib/mu_w": 0.06713888888888889, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.044136538145399744, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 521.18359375, "completions/mean_terminated_length": 523.2274780273438, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.07573333333333333, "grad_norm": 0.006266950163990259, "learning_rate": 3.5833333333333335e-06, "loss": 0.0659, "num_tokens": 15618786.0, "reward": 0.9764514565467834, "reward_std": 0.21679821610450745, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.739886999130249, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9088441133499146, "step": 71 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5564166779477461, "calib/avg_num_step_conf": 4.76953125, "calib/ece": 0.28569803921568626, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006989779342087457, "calib/mean_conf": 0.06567450980392156, "calib/mu_c": 0.0702247191011236, "calib/mu_w": 0.06323493975903614, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001176470588235294, "calib/std_conf": 0.03793575499594873, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 488.56640625, "completions/mean_terminated_length": 488.56640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.0768, "grad_norm": 0.005957463290542364, "learning_rate": 3.555555555555556e-06, "loss": 0.0653, "num_tokens": 15823947.0, "reward": 1.0219218730926514, "reward_std": 0.22082969546318054, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6837721467018127, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9342056512832642, "step": 72 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47057364341085267, "calib/avg_num_step_conf": 5.25390625, "calib/ece": 0.4320236220472441, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005496186046511628, "calib/mean_conf": 0.06403937007874015, "calib/mu_c": 0.061248, "calib/mu_w": 0.06674418604651162, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.03856488582247894, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2683.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 477.5859375, "completions/mean_terminated_length": 477.5859375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.07786666666666667, "grad_norm": 0.0061846706084907055, "learning_rate": 3.5277777777777784e-06, "loss": 0.0666, "num_tokens": 16028921.0, "reward": 1.0817893743515015, "reward_std": 0.20533283054828644, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5581741333007812, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8608090877532959, "step": 73 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.521512254241853, "calib/avg_num_step_conf": 5.86328125, "calib/ece": 0.3111904761904762, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0040587126312954455, "calib/mean_conf": 0.0634126984126984, "calib/mu_c": 0.06595744680851062, "calib/mu_w": 0.06189873417721518, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007936507936507937, "calib/std_conf": 0.03853801871359453, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 517.50390625, "completions/mean_terminated_length": 519.5333862304688, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.07893333333333333, "grad_norm": 0.005901651456952095, "learning_rate": 3.5e-06, "loss": 0.0445, "num_tokens": 16241010.0, "reward": 1.0170509815216064, "reward_std": 0.20792332291603088, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6602047085762024, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8852944374084473, "step": 74 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5081420263238445, "calib/avg_num_step_conf": 4.078125, "calib/ece": 0.46271484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0029008264462809935, "calib/mean_conf": 0.06462890625, "calib/mu_c": 0.066, "calib/mu_w": 0.06309917355371901, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03832120344115919, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1698.0, "completions/max_terminated_length": 1698.0, "completions/mean_length": 453.01171875, "completions/mean_terminated_length": 454.78826904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.08, "grad_norm": 0.006512695923447609, "learning_rate": 3.4722222222222224e-06, "loss": 0.0785, "num_tokens": 16437413.0, "reward": 1.1022157669067383, "reward_std": 0.24831125140190125, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5249893665313721, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8557596206665039, "step": 75 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47437848807711824, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.36612992125984245, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004097158802638251, "calib/mean_conf": 0.061429133858267725, "calib/mu_c": 0.05907407407407407, "calib/mu_w": 0.06317123287671232, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0011811023622047244, "calib/std_conf": 0.03622568282448506, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2885.0, "completions/max_terminated_length": 2885.0, "completions/mean_length": 506.078125, "completions/mean_terminated_length": 506.078125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.08106666666666666, "grad_norm": 0.005883322563022375, "learning_rate": 3.444444444444445e-06, "loss": 0.0326, "num_tokens": 16645705.0, "reward": 1.056463599205017, "reward_std": 0.22573524713516235, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.610862135887146, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9072553515434265, "step": 76 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5324880629758678, "calib/avg_num_step_conf": 4.58984375, "calib/ece": 0.4254538152610442, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004864692218350772, "calib/mean_conf": 0.06852208835341365, "calib/mu_c": 0.07098373983739839, "calib/mu_w": 0.06611904761904762, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.03417578750904915, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 488.5703125, "completions/mean_terminated_length": 492.4173278808594, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.08213333333333334, "grad_norm": 0.007012397050857544, "learning_rate": 3.416666666666667e-06, "loss": 0.0819, "num_tokens": 16851123.0, "reward": 1.083645224571228, "reward_std": 0.2435062825679779, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5546954870223999, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8986270427703857, "step": 77 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.45486111111111116, "calib/avg_num_step_conf": 4.734375, "calib/ece": 0.374155859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005849900793650789, "calib/mean_conf": 0.06334414062500002, "calib/mu_c": 0.06005357142857143, "calib/mu_w": 0.06590347222222222, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.034715984431421855, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 514.87109375, "completions/mean_terminated_length": 516.8901977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.0832, "grad_norm": 0.007328997366130352, "learning_rate": 3.3888888888888893e-06, "loss": 0.0514, "num_tokens": 17066634.0, "reward": 1.0606286525726318, "reward_std": 0.24192503094673157, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.594253420829773, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9102579951286316, "step": 78 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4554146156758803, "calib/avg_num_step_conf": 3.83203125, "calib/ece": 0.38522924901185773, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006175880348352883, "calib/mean_conf": 0.06773517786561264, "calib/mu_c": 0.06434210526315791, "calib/mu_w": 0.07051798561151079, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0011857707509881422, "calib/std_conf": 0.03531614736199371, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2591.0, "completions/max_terminated_length": 2591.0, "completions/mean_length": 540.53515625, "completions/mean_terminated_length": 542.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.08426666666666667, "grad_norm": 0.006232034880667925, "learning_rate": 3.3611111111111117e-06, "loss": 0.0227, "num_tokens": 17287067.0, "reward": 1.0605382919311523, "reward_std": 0.2099224030971527, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5937643647193909, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8796247243881226, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5189950980392157, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.39099609375000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004933823529411782, "calib/mean_conf": 0.07775390625000002, "calib/mu_c": 0.08037500000000003, "calib/mu_w": 0.07544117647058825, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03450166980185468, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1215.0, "completions/max_terminated_length": 1215.0, "completions/mean_length": 467.828125, "completions/mean_terminated_length": 469.66278076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.08533333333333333, "grad_norm": 0.007364832330495119, "learning_rate": 3.3333333333333333e-06, "loss": 0.037, "num_tokens": 17484671.0, "reward": 1.0838050842285156, "reward_std": 0.20884555578231812, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5993655323982239, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8614894151687622, "step": 80 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.46869521662910085, "calib/avg_num_step_conf": 4.25390625, "calib/ece": 0.3979802371541502, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0033891184573002497, "calib/mean_conf": 0.0802806324110672, "calib/mu_c": 0.0785123966942149, "calib/mu_w": 0.08190151515151516, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02879126700453696, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 539.29296875, "completions/mean_terminated_length": 539.29296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.0864, "grad_norm": 0.006128122564405203, "learning_rate": 3.3055555555555558e-06, "loss": 0.0711, "num_tokens": 17704658.0, "reward": 1.0754420757293701, "reward_std": 0.24022594094276428, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5745105743408203, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.871496856212616, "step": 81 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5617386489479512, "calib/avg_num_step_conf": 4.0859375, "calib/ece": 0.4251764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005105204872646693, "calib/mean_conf": 0.08305882352941177, "calib/mu_c": 0.0855813953488372, "calib/mu_w": 0.0804761904761905, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001176470588235294, "calib/std_conf": 0.029480077529832522, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1368.0, "completions/max_terminated_length": 1368.0, "completions/mean_length": 452.6015625, "completions/mean_terminated_length": 454.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.08746666666666666, "grad_norm": 0.007257652468979359, "learning_rate": 3.277777777777778e-06, "loss": 0.0319, "num_tokens": 17901756.0, "reward": 1.0876719951629639, "reward_std": 0.2194688618183136, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5622234344482422, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8168658018112183, "step": 82 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5077663870767319, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.35140625000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005914880397639016, "calib/mean_conf": 0.08218750000000002, "calib/mu_c": 0.08252252252252251, "calib/mu_w": 0.08193103448275861, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.028171836002468854, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1741.0, "completions/max_terminated_length": 1741.0, "completions/mean_length": 545.0234375, "completions/mean_terminated_length": 547.1608276367188, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.08853333333333334, "grad_norm": 0.006978335324674845, "learning_rate": 3.2500000000000002e-06, "loss": 0.0352, "num_tokens": 18124226.0, "reward": 1.075504183769226, "reward_std": 0.1979038417339325, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6304203271865845, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9068013429641724, "step": 83 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5181970363940728, "calib/avg_num_step_conf": 3.91015625, "calib/ece": 0.4087795275590552, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003385826771653541, "calib/mean_conf": 0.09122047244094487, "calib/mu_c": 0.09291338582677164, "calib/mu_w": 0.0895275590551181, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022105834689869885, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1348.0, "completions/max_terminated_length": 1348.0, "completions/mean_length": 462.6171875, "completions/mean_terminated_length": 464.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0896, "grad_norm": 0.009016655385494232, "learning_rate": 3.2222222222222227e-06, "loss": 0.0442, "num_tokens": 18324256.0, "reward": 1.1034959554672241, "reward_std": 0.2110147476196289, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5795402526855469, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8580283522605896, "step": 84 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5173715029277814, "calib/avg_num_step_conf": 4.75390625, "calib/ece": 0.33095617529880483, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000230318802862714, "calib/mean_conf": 0.09533864541832669, "calib/mu_c": 0.09547169811320753, "calib/mu_w": 0.09524137931034482, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00199203187250996, "calib/std_conf": 0.023092246602381246, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2770.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 525.9453125, "completions/mean_terminated_length": 532.1818237304688, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.09066666666666667, "grad_norm": 0.008049411699175835, "learning_rate": 3.1944444444444443e-06, "loss": 0.0607, "num_tokens": 18542402.0, "reward": 1.0478579998016357, "reward_std": 0.23492947220802307, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6321375370025635, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8802822828292847, "step": 85 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5167689161554192, "calib/avg_num_step_conf": 4.1796875, "calib/ece": 0.2594071146245059, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008766189502385779, "calib/mean_conf": 0.09632411067193675, "calib/mu_c": 0.09688888888888887, "calib/mu_w": 0.0960122699386503, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0140975459593047, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 546.21484375, "completions/mean_terminated_length": 548.3568725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.09173333333333333, "grad_norm": 0.0073614963330328465, "learning_rate": 3.1666666666666667e-06, "loss": 0.0669, "num_tokens": 18763425.0, "reward": 1.02299165725708, "reward_std": 0.22230364382266998, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6954777836799622, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8994483947753906, "step": 86 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5043693009118542, "calib/avg_num_step_conf": 4.55078125, "calib/ece": 0.46102766798418976, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011835106382978577, "calib/mean_conf": 0.09628458498023715, "calib/mu_c": 0.09680851063829786, "calib/mu_w": 0.095625, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.014351255592481205, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2204.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 476.59765625, "completions/mean_terminated_length": 478.4667053222656, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.0928, "grad_norm": 0.010919037275016308, "learning_rate": 3.138888888888889e-06, "loss": 0.0504, "num_tokens": 18966610.0, "reward": 1.1162362098693848, "reward_std": 0.21834619343280792, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5347750186920166, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7969573736190796, "step": 87 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.512905465101603, "calib/avg_num_step_conf": 5.38671875, "calib/ece": 0.35466403162055327, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001821910892338796, "calib/mean_conf": 0.0959288537549407, "calib/mu_c": 0.09692982456140352, "calib/mu_w": 0.09510791366906472, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014677928277792457, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 567.5546875, "completions/mean_terminated_length": 572.0236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.09386666666666667, "grad_norm": 0.007051222026348114, "learning_rate": 3.1111111111111116e-06, "loss": 0.0465, "num_tokens": 19197432.0, "reward": 1.058126449584961, "reward_std": 0.26280489563941956, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6045206785202026, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8531519174575806, "step": 88 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.503820409194241, "calib/avg_num_step_conf": 4.55078125, "calib/ece": 0.32231372549019605, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000636524374842129, "calib/mean_conf": 0.09729411764705881, "calib/mu_c": 0.09766355140186915, "calib/mu_w": 0.09702702702702702, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01192014955434385, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2276.0, "completions/max_terminated_length": 2276.0, "completions/mean_length": 562.671875, "completions/mean_terminated_length": 562.671875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.09493333333333333, "grad_norm": 0.0061728148721158504, "learning_rate": 3.0833333333333336e-06, "loss": 0.0435, "num_tokens": 19426044.0, "reward": 1.0548436641693115, "reward_std": 0.14040419459342957, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6501949429512024, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8486722707748413, "step": 89 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4921259842519685, "calib/avg_num_step_conf": 4.46484375, "calib/ece": 0.40314960629921265, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007874015748031565, "calib/mean_conf": 0.0968503937007874, "calib/mu_c": 0.09645669291338582, "calib/mu_w": 0.09724409448818898, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.012147439858694105, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2791.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 550.640625, "completions/mean_terminated_length": 550.640625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.096, "grad_norm": 0.007021500263363123, "learning_rate": 3.055555555555556e-06, "loss": 0.0617, "num_tokens": 19646008.0, "reward": 1.105597734451294, "reward_std": 0.22193342447280884, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5823438167572021, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8764536380767822, "step": 90 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5429648554336989, "calib/avg_num_step_conf": 4.19140625, "calib/ece": 0.437992125984252, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004717098703888339, "calib/mean_conf": 0.09744094488188976, "calib/mu_c": 0.09963235294117646, "calib/mu_w": 0.09491525423728812, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01187809042641385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1506.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 530.578125, "completions/mean_terminated_length": 532.6588745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.09706666666666666, "grad_norm": 0.007281932048499584, "learning_rate": 3.0277777777777776e-06, "loss": 0.0262, "num_tokens": 19865228.0, "reward": 1.1158626079559326, "reward_std": 0.20746996998786926, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5572363138198853, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8271024227142334, "step": 91 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5242480620155038, "calib/avg_num_step_conf": 3.9140625, "calib/ece": 0.4098425196850393, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002424806201550389, "calib/mean_conf": 0.09803149606299212, "calib/mu_c": 0.09922480620155039, "calib/mu_w": 0.0968, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.009723692153723203, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1636.0, "completions/max_terminated_length": 1636.0, "completions/mean_length": 503.4375, "completions/mean_terminated_length": 505.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.09813333333333334, "grad_norm": 0.006811084225773811, "learning_rate": 3e-06, "loss": 0.0072, "num_tokens": 20076508.0, "reward": 1.1008079051971436, "reward_std": 0.21823862195014954, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5709179639816284, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8520207405090332, "step": 92 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5140108870468886, "calib/avg_num_step_conf": 3.87890625, "calib/ece": 0.363921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014010887046888576, "calib/mean_conf": 0.09882352941176469, "calib/mu_c": 0.09957627118644068, "calib/mu_w": 0.09817518248175182, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007578881603955957, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1927.0, "completions/max_terminated_length": 1927.0, "completions/mean_length": 514.77734375, "completions/mean_terminated_length": 516.7960815429688, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.0992, "grad_norm": 0.008910668082535267, "learning_rate": 2.9722222222222225e-06, "loss": -0.0041, "num_tokens": 20289747.0, "reward": 1.0883748531341553, "reward_std": 0.23094485700130463, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6171679496765137, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8769761323928833, "step": 93 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5107987339415379, "calib/avg_num_step_conf": 3.8515625, "calib/ece": 0.3862204724409449, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014522435300689113, "calib/mean_conf": 0.09803149606299212, "calib/mu_c": 0.09878048780487805, "calib/mu_w": 0.09732824427480914, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.010688088157872621, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 520.30859375, "completions/mean_terminated_length": 520.30859375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.10026666666666667, "grad_norm": 0.010884858667850494, "learning_rate": 2.944444444444445e-06, "loss": 0.0354, "num_tokens": 20507306.0, "reward": 1.0963938236236572, "reward_std": 0.18074506521224976, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.59312504529953, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8821374177932739, "step": 94 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49912761714855436, "calib/avg_num_step_conf": 3.84375, "calib/ece": 0.4375984251968503, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -8.723828514453635e-05, "calib/mean_conf": 0.09783464566929133, "calib/mu_c": 0.09779411764705882, "calib/mu_w": 0.09788135593220336, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010177374767488633, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1643.0, "completions/max_terminated_length": 1643.0, "completions/mean_length": 525.859375, "completions/mean_terminated_length": 527.9215698242188, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.10133333333333333, "grad_norm": 0.007196079473942518, "learning_rate": 2.916666666666667e-06, "loss": -0.0126, "num_tokens": 20723734.0, "reward": 1.1179805994033813, "reward_std": 0.21131283044815063, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5552442073822021, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8395587205886841, "step": 95 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.529807948786343, "calib/avg_num_step_conf": 4.03125, "calib/ece": 0.5427450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002980794878634313, "calib/mean_conf": 0.09647058823529411, "calib/mu_c": 0.09754601226993863, "calib/mu_w": 0.09456521739130432, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01280678885710426, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 500.0078125, "completions/mean_terminated_length": 501.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.1024, "grad_norm": 0.010168724693357944, "learning_rate": 2.888888888888889e-06, "loss": 0.0303, "num_tokens": 20933232.0, "reward": 1.156736969947815, "reward_std": 0.18414181470870972, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.47416016459465027, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7333148717880249, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5306818181818183, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.381547619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003068181818181845, "calib/mean_conf": 0.09464285714285715, "calib/mu_c": 0.09625, "calib/mu_w": 0.09318181818181816, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.01546473935329355, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2074.0, "completions/max_terminated_length": 2074.0, "completions/mean_length": 520.75390625, "completions/mean_terminated_length": 524.8543090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.10346666666666667, "grad_norm": 0.007721587549895048, "learning_rate": 2.861111111111111e-06, "loss": -0.0093, "num_tokens": 21147297.0, "reward": 1.1001715660095215, "reward_std": 0.2343776524066925, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5960644483566284, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9101195931434631, "step": 97 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48524681430162064, "calib/avg_num_step_conf": 3.921875, "calib/ece": 0.3697647058823529, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017617221328714433, "calib/mean_conf": 0.09298039215686275, "calib/mu_c": 0.09203389830508475, "calib/mu_w": 0.0937956204379562, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.017771413701342168, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 588.20703125, "completions/mean_terminated_length": 588.20703125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.10453333333333334, "grad_norm": 0.006101598031818867, "learning_rate": 2.8333333333333335e-06, "loss": 0.047, "num_tokens": 21379742.0, "reward": 1.0822640657424927, "reward_std": 0.24437329173088074, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6110738515853882, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8647211790084839, "step": 98 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.55, "calib/avg_num_step_conf": 3.64453125, "calib/ece": 0.22842519685039375, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005057471264367813, "calib/mean_conf": 0.08653543307086614, "calib/mu_c": 0.09, "calib/mu_w": 0.08494252873563218, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.022303488401844006, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2316.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 618.5859375, "completions/mean_terminated_length": 623.4566650390625, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.1056, "grad_norm": 0.005430999211966991, "learning_rate": 2.805555555555556e-06, "loss": 0.0359, "num_tokens": 21619580.0, "reward": 1.0024843215942383, "reward_std": 0.18751585483551025, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7241469025611877, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9163306951522827, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5367610062893081, "calib/avg_num_step_conf": 3.6796875, "calib/ece": 0.33039062500000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0038767295597484652, "calib/mean_conf": 0.08367187500000002, "calib/mu_c": 0.08594339622641513, "calib/mu_w": 0.08206666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024442889231520385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1528.0, "completions/max_terminated_length": 1528.0, "completions/mean_length": 561.5234375, "completions/mean_terminated_length": 563.7255249023438, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.10666666666666667, "grad_norm": 0.005294025409966707, "learning_rate": 2.7777777777777783e-06, "loss": 0.0466, "num_tokens": 21846418.0, "reward": 1.0728075504302979, "reward_std": 0.18506084382534027, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6495109796524048, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9359585046768188, "step": 100 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5729030856149501, "calib/avg_num_step_conf": 3.65625, "calib/ece": 0.23243137254901963, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007772707518470215, "calib/mean_conf": 0.07345098039215685, "calib/mu_c": 0.07884615384615383, "calib/mu_w": 0.07107344632768361, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02616412281289482, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2482.0, "completions/max_terminated_length": 2482.0, "completions/mean_length": 620.73828125, "completions/mean_terminated_length": 623.172607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.10773333333333333, "grad_norm": 0.00502887275069952, "learning_rate": 2.7500000000000004e-06, "loss": 0.0173, "num_tokens": 22087999.0, "reward": 0.9983644485473633, "reward_std": 0.20291808247566223, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7256336212158203, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9281280040740967, "step": 101 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6437863096741602, "calib/avg_num_step_conf": 3.74609375, "calib/ece": 0.5017254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01461038140944683, "calib/mean_conf": 0.07866666666666666, "calib/mu_c": 0.08479729729729729, "calib/mu_w": 0.07018691588785046, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02501346042862481, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2390.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 516.31640625, "completions/mean_terminated_length": 518.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1088, "grad_norm": 0.0057980152778327465, "learning_rate": 2.7222222222222224e-06, "loss": 0.0445, "num_tokens": 22302552.0, "reward": 1.1512010097503662, "reward_std": 0.16641102731227875, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5092281103134155, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8754100203514099, "step": 102 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6484340676583192, "calib/avg_num_step_conf": 3.4453125, "calib/ece": 0.37744094488188973, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01481265298437208, "calib/mean_conf": 0.06744094488188977, "calib/mu_c": 0.0756637168141593, "calib/mu_w": 0.06085106382978722, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023945197780452628, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2455.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 612.11328125, "completions/mean_terminated_length": 614.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.10986666666666667, "grad_norm": 0.00619841692969203, "learning_rate": 2.6944444444444444e-06, "loss": 0.0553, "num_tokens": 22539485.0, "reward": 1.0578868389129639, "reward_std": 0.15986822545528412, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6124964952468872, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8440539836883545, "step": 103 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6046659867414584, "calib/avg_num_step_conf": 3.54296875, "calib/ece": 0.3460629921259843, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010466598674145844, "calib/mean_conf": 0.07125984251968505, "calib/mu_c": 0.07735849056603773, "calib/mu_w": 0.06689189189189189, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.024718641184790967, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2658.0, "completions/max_terminated_length": 2658.0, "completions/mean_length": 542.8828125, "completions/mean_terminated_length": 545.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.11093333333333333, "grad_norm": 0.006147816311568022, "learning_rate": 2.666666666666667e-06, "loss": 0.0104, "num_tokens": 22760823.0, "reward": 1.0557448863983154, "reward_std": 0.2222408950328827, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6210156083106995, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9340734481811523, "step": 104 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5702894088669951, "calib/avg_num_step_conf": 3.49609375, "calib/ece": 0.3773046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007169950738916256, "calib/mean_conf": 0.07582031250000001, "calib/mu_c": 0.07974137931034483, "calib/mu_w": 0.07257142857142858, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025266372660165206, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2389.0, "completions/max_terminated_length": 2389.0, "completions/mean_length": 529.30859375, "completions/mean_terminated_length": 531.3843383789062, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.112, "grad_norm": 0.005158624146133661, "learning_rate": 2.6388888888888893e-06, "loss": 0.0376, "num_tokens": 22977766.0, "reward": 1.0840234756469727, "reward_std": 0.1894109547138214, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6088863611221313, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9073833227157593, "step": 105 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5456932773109244, "calib/avg_num_step_conf": 3.20703125, "calib/ece": 0.3913725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00456932773109249, "calib/mean_conf": 0.07529411764705883, "calib/mu_c": 0.07773109243697483, "calib/mu_w": 0.07316176470588234, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024998269836324447, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1906.0, "completions/max_terminated_length": 1906.0, "completions/mean_length": 476.98828125, "completions/mean_terminated_length": 478.8588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.11306666666666666, "grad_norm": 0.005500629544258118, "learning_rate": 2.6111111111111113e-06, "loss": -0.0615, "num_tokens": 23180139.0, "reward": 1.0853602886199951, "reward_std": 0.1865309774875641, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5926367044448853, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9014803171157837, "step": 106 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5015282107708294, "calib/avg_num_step_conf": 3.5234375, "calib/ece": 0.406015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.545448988326316e-05, "calib/mean_conf": 0.07445312500000001, "calib/mu_c": 0.07447154471544716, "calib/mu_w": 0.0744360902255639, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.025273769559255997, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1795.0, "completions/max_terminated_length": 1795.0, "completions/mean_length": 463.90234375, "completions/mean_terminated_length": 465.7215881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.11413333333333334, "grad_norm": 0.005841091740876436, "learning_rate": 2.5833333333333337e-06, "loss": 0.0155, "num_tokens": 23379194.0, "reward": 1.1032767295837402, "reward_std": 0.22194012999534607, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5849117040634155, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9214085340499878, "step": 107 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5366594909459288, "calib/avg_num_step_conf": 3.3515625, "calib/ece": 0.5136470588235295, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0037241990629352656, "calib/mean_conf": 0.07066666666666666, "calib/mu_c": 0.07221476510067112, "calib/mu_w": 0.06849056603773586, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.02518844013312197, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2522.0, "completions/max_terminated_length": 2522.0, "completions/mean_length": 527.16796875, "completions/mean_terminated_length": 527.16796875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.1152, "grad_norm": 0.005249754525721073, "learning_rate": 2.5555555555555557e-06, "loss": 0.0701, "num_tokens": 23593061.0, "reward": 1.139378309249878, "reward_std": 0.20425044000148773, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.49251872301101685, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8459128141403198, "step": 108 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5496503496503496, "calib/avg_num_step_conf": 3.69921875, "calib/ece": 0.3754545454545455, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0055314685314685405, "calib/mean_conf": 0.05932806324110672, "calib/mu_c": 0.06245454545454546, "calib/mu_w": 0.056923076923076917, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.023370682856895336, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2558.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 519.0546875, "completions/mean_terminated_length": 521.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.11626666666666667, "grad_norm": 0.0058748298324644566, "learning_rate": 2.5277777777777778e-06, "loss": 0.0096, "num_tokens": 23806219.0, "reward": 1.050431728363037, "reward_std": 0.1639152467250824, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6082472801208496, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8711696267127991, "step": 109 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5218069488622462, "calib/avg_num_step_conf": 3.6171875, "calib/ece": 0.42566406249999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002355028137998545, "calib/mean_conf": 0.05089843750000001, "calib/mu_c": 0.052131147540983615, "calib/mu_w": 0.04977611940298507, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.018444740037707057, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 436.265625, "completions/mean_terminated_length": 437.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.11733333333333333, "grad_norm": 0.006007987540215254, "learning_rate": 2.5e-06, "loss": -0.0107, "num_tokens": 23998503.0, "reward": 1.0971605777740479, "reward_std": 0.1857801228761673, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5701941251754761, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9420040249824524, "step": 110 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5945452254976064, "calib/avg_num_step_conf": 4.37890625, "calib/ece": 0.46710317460317463, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007698412698412694, "calib/mean_conf": 0.032896825396825397, "calib/mu_c": 0.036746031746031735, "calib/mu_w": 0.02904761904761904, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.020623737812299352, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 502.1328125, "completions/mean_terminated_length": 502.1328125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.1184, "grad_norm": 0.005854738410562277, "learning_rate": 2.4722222222222226e-06, "loss": 0.1053, "num_tokens": 24210137.0, "reward": 1.0687568187713623, "reward_std": 0.19579537212848663, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.526875376701355, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8587761521339417, "step": 111 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5454084221207509, "calib/avg_num_step_conf": 4.51171875, "calib/ece": 0.40416535433070866, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0035213089802130816, "calib/mean_conf": 0.02103149606299213, "calib/mu_c": 0.023055555555555548, "calib/mu_w": 0.019534246575342466, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.017747324078153975, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2758.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 525.72265625, "completions/mean_terminated_length": 527.7843627929688, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.11946666666666667, "grad_norm": 0.005254555959254503, "learning_rate": 2.4444444444444447e-06, "loss": 0.065, "num_tokens": 24428322.0, "reward": 1.0399373769760132, "reward_std": 0.16829246282577515, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5890142321586609, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8973459005355835, "step": 112 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5124275317626742, "calib/avg_num_step_conf": 5.69921875, "calib/ece": 0.4572078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007307265326261247, "calib/mean_conf": 0.01730196078431373, "calib/mu_c": 0.017685950413223142, "calib/mu_w": 0.016955223880597017, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0154875976971845, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2456.0, "completions/max_terminated_length": 2456.0, "completions/mean_length": 478.8515625, "completions/mean_terminated_length": 478.8515625, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.12053333333333334, "grad_norm": 0.006173197645694017, "learning_rate": 2.4166666666666667e-06, "loss": 0.0511, "num_tokens": 24631788.0, "reward": 1.0740957260131836, "reward_std": 0.19459447264671326, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5396190881729126, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9280819892883301, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5005526897568165, "calib/avg_num_step_conf": 5.55859375, "calib/ece": 0.52037890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00010476541390322391, "calib/mean_conf": 0.01868359375, "calib/mu_c": 0.018731884057971014, "calib/mu_w": 0.01862711864406779, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.016839009199919125, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1332.0, "completions/max_terminated_length": 1332.0, "completions/mean_length": 446.0625, "completions/mean_terminated_length": 447.8117980957031, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.1216, "grad_norm": 0.0068952166475355625, "learning_rate": 2.388888888888889e-06, "loss": 0.0109, "num_tokens": 24826684.0, "reward": 1.1039800643920898, "reward_std": 0.14307743310928345, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4801192879676819, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9009939432144165, "step": 114 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5522073412698412, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.42344921874999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003957341269841274, "calib/mean_conf": 0.014050781250000005, "calib/mu_c": 0.01627678571428572, "calib/mu_w": 0.012319444444444445, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012486921258446713, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 490.46484375, "completions/mean_terminated_length": 492.3882751464844, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.12266666666666666, "grad_norm": 0.005587155930697918, "learning_rate": 2.361111111111111e-06, "loss": -0.0196, "num_tokens": 25033187.0, "reward": 1.0444157123565674, "reward_std": 0.17458747327327728, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5763888359069824, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8748853206634521, "step": 115 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5472972972972973, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.40129881889763774, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004526185619581846, "calib/mean_conf": 0.0160240157480315, "calib/mu_c": 0.018661320754716982, "calib/mu_w": 0.014135135135135136, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014862978302858687, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2513.0, "completions/max_terminated_length": 2513.0, "completions/mean_length": 537.7265625, "completions/mean_terminated_length": 539.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.12373333333333333, "grad_norm": 0.007462795823812485, "learning_rate": 2.3333333333333336e-06, "loss": 0.0444, "num_tokens": 25251045.0, "reward": 1.0313544273376465, "reward_std": 0.17312878370285034, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5931049585342407, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8860822319984436, "step": 116 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5420689655172414, "calib/avg_num_step_conf": 6.5390625, "calib/ece": 0.41648980392156865, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034036990595611317, "calib/mean_conf": 0.014882745098039217, "calib/mu_c": 0.016818181818181822, "calib/mu_w": 0.01341448275862069, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01337153899929005, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 501.64453125, "completions/mean_terminated_length": 503.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.1248, "grad_norm": 0.0062988935969769955, "learning_rate": 2.305555555555556e-06, "loss": 0.0245, "num_tokens": 25461746.0, "reward": 1.0523920059204102, "reward_std": 0.15046769380569458, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5804606676101685, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9314590692520142, "step": 117 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5618512110726643, "calib/avg_num_step_conf": 6.94921875, "calib/ece": 0.4460192156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0044582983193277165, "calib/mean_conf": 0.020647450980392158, "calib/mu_c": 0.0230252100840336, "calib/mu_w": 0.018566911764705884, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0176806214782015, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 506.53125, "completions/mean_terminated_length": 508.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.12586666666666665, "grad_norm": 0.008921671658754349, "learning_rate": 2.277777777777778e-06, "loss": 0.0534, "num_tokens": 25671106.0, "reward": 1.0745387077331543, "reward_std": 0.13702334463596344, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5519201755523682, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9208766222000122, "step": 118 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5277355623100304, "calib/avg_num_step_conf": 6.5625, "calib/ece": 0.4224110671936759, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022270770010131696, "calib/mean_conf": 0.020276679841897235, "calib/mu_c": 0.021517857142857144, "calib/mu_w": 0.019290780141843974, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.017363884240948854, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1700.0, "completions/max_terminated_length": 1700.0, "completions/mean_length": 512.44921875, "completions/mean_terminated_length": 516.4842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.12693333333333334, "grad_norm": 0.008361544460058212, "learning_rate": 2.25e-06, "loss": -0.0055, "num_tokens": 25883037.0, "reward": 1.0460152626037598, "reward_std": 0.20650334656238556, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5649992227554321, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8946875929832458, "step": 119 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6579464505035617, "calib/avg_num_step_conf": 5.87890625, "calib/ece": 0.510078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012579218865143706, "calib/mean_conf": 0.028984375, "calib/mu_c": 0.034782608695652174, "calib/mu_w": 0.022203389830508468, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.019954629935415365, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1746.0, "completions/max_terminated_length": 1746.0, "completions/mean_length": 489.9375, "completions/mean_terminated_length": 491.8588562011719, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.128, "grad_norm": 0.00834466703236103, "learning_rate": 2.222222222222222e-06, "loss": 0.0239, "num_tokens": 26090829.0, "reward": 1.1241123676300049, "reward_std": 0.13478730618953705, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.4971992075443268, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9458011984825134, "step": 120 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5345997286295794, "calib/avg_num_step_conf": 5.79296875, "calib/ece": 0.4369019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002664980880720376, "calib/mean_conf": 0.037607843137254904, "calib/mu_c": 0.03900826446280992, "calib/mu_w": 0.03634328358208955, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0183046960988024, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 539.2265625, "completions/mean_terminated_length": 539.2265625, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.12906666666666666, "grad_norm": 0.006870029028505087, "learning_rate": 2.1944444444444445e-06, "loss": 0.0881, "num_tokens": 26309607.0, "reward": 1.0757176876068115, "reward_std": 0.20989492535591125, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5585699081420898, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8966686725616455, "step": 121 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4999684642068748, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.5431640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.2075055187621706e-05, "calib/mean_conf": 0.04667968750000001, "calib/mu_c": 0.04668874172185429, "calib/mu_w": 0.04666666666666667, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0108753114853021, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1019.0, "completions/max_terminated_length": 1019.0, "completions/mean_length": 452.5546875, "completions/mean_terminated_length": 454.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.13013333333333332, "grad_norm": 0.007138589397072792, "learning_rate": 2.166666666666667e-06, "loss": 0.0008, "num_tokens": 26508485.0, "reward": 1.1392134428024292, "reward_std": 0.1293630599975586, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.45904064178466797, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8809599876403809, "step": 122 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4844704844704845, "calib/avg_num_step_conf": 5.1015625, "calib/ece": 0.3895275590551181, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015220815220815184, "calib/mean_conf": 0.049055118110236225, "calib/mu_c": 0.048198198198198185, "calib/mu_w": 0.0497202797202797, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0007874015748031496, "calib/std_conf": 0.008365006870689341, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2300.0, "completions/max_terminated_length": 2300.0, "completions/mean_length": 531.0703125, "completions/mean_terminated_length": 533.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.1312, "grad_norm": 0.00916409119963646, "learning_rate": 2.138888888888889e-06, "loss": 0.0116, "num_tokens": 26725407.0, "reward": 1.0490834712982178, "reward_std": 0.1959851086139679, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5940663814544678, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8785138130187988, "step": 123 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4929886335557203, "calib/avg_num_step_conf": 5.17578125, "calib/ece": 0.5056640625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006683963429701192, "calib/mean_conf": 0.049804687500000014, "calib/mu_c": 0.049507042253521115, "calib/mu_w": 0.050175438596491234, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000390625, "calib/std_conf": 0.0047967413967550665, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 471.6328125, "completions/mean_terminated_length": 473.4823913574219, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.13226666666666667, "grad_norm": 0.007121821399778128, "learning_rate": 2.1111111111111114e-06, "loss": 0.0172, "num_tokens": 26928641.0, "reward": 1.133141279220581, "reward_std": 0.16863961517810822, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4977308511734009, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9183536767959595, "step": 124 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4970704329591711, "calib/avg_num_step_conf": 4.9765625, "calib/ece": 0.4235294117647058, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002929567040828862, "calib/mean_conf": 0.050980392156862744, "calib/mu_c": 0.05082644628099174, "calib/mu_w": 0.05111940298507463, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006932419423397525, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1844.0, "completions/max_terminated_length": 1844.0, "completions/mean_length": 483.42578125, "completions/mean_terminated_length": 485.32159423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.13333333333333333, "grad_norm": 0.006879989989101887, "learning_rate": 2.0833333333333334e-06, "loss": -0.0187, "num_tokens": 27132886.0, "reward": 1.0873610973358154, "reward_std": 0.18790683150291443, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.56884765625, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9070613384246826, "step": 125 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.500062003968254, "calib/avg_num_step_conf": 5.10546875, "calib/ece": 0.4456692913385827, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 6.200396825382926e-06, "calib/mean_conf": 0.05039370078740157, "calib/mu_c": 0.05039682539682539, "calib/mu_w": 0.05039062500000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0044192803780794595, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 479.2265625, "completions/mean_terminated_length": 479.2265625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.1344, "grad_norm": 0.006863424088805914, "learning_rate": 2.0555555555555555e-06, "loss": -0.0045, "num_tokens": 27336712.0, "reward": 1.0819272994995117, "reward_std": 0.1444890946149826, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5466894507408142, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8702676892280579, "step": 126 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5727709790209791, "calib/avg_num_step_conf": 4.91015625, "calib/ece": 0.383921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00727709790209792, "calib/mean_conf": 0.05529411764705882, "calib/mu_c": 0.05937500000000001, "calib/mu_w": 0.05209790209790209, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015384349212496496, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 479.375, "completions/mean_terminated_length": 481.25494384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.13546666666666668, "grad_norm": 0.007512710057199001, "learning_rate": 2.027777777777778e-06, "loss": 0.058, "num_tokens": 27538784.0, "reward": 1.0702171325683594, "reward_std": 0.14376309514045715, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6072656512260437, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9178997278213501, "step": 127 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5271164021164021, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.37, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0027116402116402136, "calib/mean_conf": 0.053529411764705874, "calib/mu_c": 0.05509259259259259, "calib/mu_w": 0.052380952380952375, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01280678885710426, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 520.66015625, "completions/mean_terminated_length": 520.66015625, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.13653333333333334, "grad_norm": 0.006195978261530399, "learning_rate": 2.0000000000000003e-06, "loss": 0.0201, "num_tokens": 27754417.0, "reward": 1.0464067459106445, "reward_std": 0.18519645929336548, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6176855564117432, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8643181324005127, "step": 128 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48170731707317077, "calib/avg_num_step_conf": 4.8359375, "calib/ece": 0.45843137254901967, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0018292682926829285, "calib/mean_conf": 0.05921568627450981, "calib/mu_c": 0.05833333333333333, "calib/mu_w": 0.060162601626016256, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01938699152048357, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1233.0, "completions/max_terminated_length": 1233.0, "completions/mean_length": 448.078125, "completions/mean_terminated_length": 449.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.1376, "grad_norm": 0.007027975749224424, "learning_rate": 1.9722222222222224e-06, "loss": 0.0321, "num_tokens": 27947189.0, "reward": 1.118253469467163, "reward_std": 0.16204269230365753, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5367578268051147, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9229358434677124, "step": 129 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.550257486172039, "calib/avg_num_step_conf": 4.44921875, "calib/ece": 0.5155511811023622, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005025748617203882, "calib/mean_conf": 0.06318897637795276, "calib/mu_c": 0.06530612244897958, "calib/mu_w": 0.0602803738317757, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02203405820541105, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 429.99609375, "completions/mean_terminated_length": 433.38189697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.13866666666666666, "grad_norm": 0.009456264786422253, "learning_rate": 1.944444444444445e-06, "loss": 0.017, "num_tokens": 28138236.0, "reward": 1.1353545188903809, "reward_std": 0.15697000920772552, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.488525390625, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8706176280975342, "step": 130 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6228411894559148, "calib/avg_num_step_conf": 4.30859375, "calib/ece": 0.3379446640316206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01228411894559149, "calib/mean_conf": 0.06521739130434782, "calib/mu_c": 0.07254901960784313, "calib/mu_w": 0.06026490066225164, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.023006533139692094, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 468.35546875, "completions/mean_terminated_length": 470.19219970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.13973333333333332, "grad_norm": 0.009817316196858883, "learning_rate": 1.916666666666667e-06, "loss": 0.031, "num_tokens": 28340023.0, "reward": 1.059814453125, "reward_std": 0.13384954631328583, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6429296731948853, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9487107992172241, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.600626959247649, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.49745098039215685, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010062695924764886, "calib/mean_conf": 0.07117647058823529, "calib/mu_c": 0.07551724137931035, "calib/mu_w": 0.06545454545454546, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024705882352941175, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2602.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 474.27734375, "completions/mean_terminated_length": 474.27734375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1408, "grad_norm": 0.012203056365251541, "learning_rate": 1.888888888888889e-06, "loss": 0.0832, "num_tokens": 28542710.0, "reward": 1.1420726776123047, "reward_std": 0.1849452704191208, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5095800757408142, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8850681781768799, "step": 132 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5585548172757474, "calib/avg_num_step_conf": 4.31640625, "calib/ece": 0.26777343750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005855481727574739, "calib/mean_conf": 0.06035156250000001, "calib/mu_c": 0.06428571428571428, "calib/mu_w": 0.05843023255813954, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02025890616021985, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1436.0, "completions/max_terminated_length": 1436.0, "completions/mean_length": 509.28515625, "completions/mean_terminated_length": 511.2823791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.14186666666666667, "grad_norm": 0.010452408343553543, "learning_rate": 1.8611111111111113e-06, "loss": 0.0137, "num_tokens": 28755111.0, "reward": 1.0223243236541748, "reward_std": 0.202122300863266, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.7100098133087158, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9567779302597046, "step": 133 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5752956216043463, "calib/avg_num_step_conf": 4.05078125, "calib/ece": 0.34685039370078746, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007529562160434655, "calib/mean_conf": 0.06653543307086614, "calib/mu_c": 0.07095238095238096, "calib/mu_w": 0.0634228187919463, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023523416135889222, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2411.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 540.3515625, "completions/mean_terminated_length": 542.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.14293333333333333, "grad_norm": 0.011523211374878883, "learning_rate": 1.8333333333333333e-06, "loss": 0.0359, "num_tokens": 28978073.0, "reward": 1.058791160583496, "reward_std": 0.19045865535736084, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6352930068969727, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9270786046981812, "step": 134 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6085626911314984, "calib/avg_num_step_conf": 3.7421875, "calib/ece": 0.48539999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010856269113149838, "calib/mean_conf": 0.0786, "calib/mu_c": 0.08333333333333331, "calib/mu_w": 0.07247706422018348, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.024739442192579852, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2987.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 527.0390625, "completions/mean_terminated_length": 529.1058959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.144, "grad_norm": 0.010717969387769699, "learning_rate": 1.8055555555555557e-06, "loss": 0.0246, "num_tokens": 29194555.0, "reward": 1.1342743635177612, "reward_std": 0.2033892273902893, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5109472870826721, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9214526414871216, "step": 135 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6843548387096774, "calib/avg_num_step_conf": 4.29296875, "calib/ece": 0.31686274509803924, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01843548387096776, "calib/mean_conf": 0.07529411764705882, "calib/mu_c": 0.08650000000000002, "calib/mu_w": 0.06806451612903226, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024998269836324447, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 515.93359375, "completions/mean_terminated_length": 515.93359375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.14506666666666668, "grad_norm": 0.013928011059761047, "learning_rate": 1.777777777777778e-06, "loss": 0.0128, "num_tokens": 29410802.0, "reward": 1.0524232387542725, "reward_std": 0.15639856457710266, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6629101634025574, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9244980216026306, "step": 136 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6169230769230769, "calib/avg_num_step_conf": 4.39453125, "calib/ece": 0.427843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01169230769230764, "calib/mean_conf": 0.08196078431372548, "calib/mu_c": 0.08769230769230768, "calib/mu_w": 0.07600000000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024011403160535898, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1710.0, "completions/max_terminated_length": 1710.0, "completions/mean_length": 449.671875, "completions/mean_terminated_length": 451.4353332519531, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.14613333333333334, "grad_norm": 0.013641622848808765, "learning_rate": 1.75e-06, "loss": 0.0243, "num_tokens": 29608582.0, "reward": 1.1266405582427979, "reward_std": 0.1831943392753601, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5700781345367432, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9367181658744812, "step": 137 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6876623376623376, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.5299212598425197, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018766233766233764, "calib/mean_conf": 0.0763779527559055, "calib/mu_c": 0.08376623376623377, "calib/mu_w": 0.065, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02496199603802734, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2313.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 482.6953125, "completions/mean_terminated_length": 482.6953125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.1472, "grad_norm": 0.008644962683320045, "learning_rate": 1.7222222222222224e-06, "loss": 0.0417, "num_tokens": 29812168.0, "reward": 1.1777849197387695, "reward_std": 0.18831929564476013, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.48500001430511475, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9223892688751221, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6052830188679246, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.504296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010528301886792449, "calib/mean_conf": 0.08164062500000001, "calib/mu_c": 0.086, "calib/mu_w": 0.07547169811320754, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02410191070453492, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 970.0, "completions/max_terminated_length": 970.0, "completions/mean_length": 436.7734375, "completions/mean_terminated_length": 438.4862976074219, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.14826666666666666, "grad_norm": 0.011200047098100185, "learning_rate": 1.6944444444444446e-06, "loss": -0.0204, "num_tokens": 30002758.0, "reward": 1.1692848205566406, "reward_std": 0.10654379427433014, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5075976848602295, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.918194055557251, "step": 139 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6192922374429224, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.4887795275590552, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011929223744292228, "calib/mean_conf": 0.08602362204724409, "calib/mu_c": 0.09109589041095889, "calib/mu_w": 0.07916666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022438354595636326, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1251.0, "completions/max_terminated_length": 1251.0, "completions/mean_length": 457.515625, "completions/mean_terminated_length": 459.3098449707031, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.14933333333333335, "grad_norm": 0.01058944035321474, "learning_rate": 1.6666666666666667e-06, "loss": -0.0054, "num_tokens": 30200578.0, "reward": 1.149709701538086, "reward_std": 0.1826654076576233, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5179394483566284, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8848344683647156, "step": 140 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.673841059602649, "calib/avg_num_step_conf": 5.08984375, "calib/ece": 0.5114624505928854, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017384105960264892, "calib/mean_conf": 0.08537549407114625, "calib/mu_c": 0.09238410596026489, "calib/mu_w": 0.075, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.022745309907310766, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1951.0, "completions/max_terminated_length": 1951.0, "completions/mean_length": 521.2265625, "completions/mean_terminated_length": 521.2265625, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 0.1504, "grad_norm": 0.009190862998366356, "learning_rate": 1.638888888888889e-06, "loss": 0.0585, "num_tokens": 30416788.0, "reward": 1.1587471961975098, "reward_std": 0.15836520493030548, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.49970701336860657, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8808870315551758, "step": 141 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6190078037904125, "calib/avg_num_step_conf": 5.375, "calib/ece": 0.3703921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011900780379041256, "calib/mean_conf": 0.0884313725490196, "calib/mu_c": 0.09487179487179487, "calib/mu_w": 0.08297101449275361, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02108549813140402, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1400.0, "completions/max_terminated_length": 1400.0, "completions/mean_length": 491.26171875, "completions/mean_terminated_length": 493.1882629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.15146666666666667, "grad_norm": 0.009988175705075264, "learning_rate": 1.6111111111111113e-06, "loss": -0.0049, "num_tokens": 30623391.0, "reward": 1.1010844707489014, "reward_std": 0.1659606695175171, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6175488233566284, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9426777362823486, "step": 142 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6052226870078741, "calib/avg_num_step_conf": 5.19921875, "calib/ece": 0.4064705882352942, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010522268700787388, "calib/mean_conf": 0.09156862745098039, "calib/mu_c": 0.0968503937007874, "calib/mu_w": 0.086328125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01872112668592943, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2210.0, "completions/max_terminated_length": 2210.0, "completions/mean_length": 483.2109375, "completions/mean_terminated_length": 485.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.15253333333333333, "grad_norm": 0.009511373937129974, "learning_rate": 1.5833333333333333e-06, "loss": -0.0132, "num_tokens": 30830109.0, "reward": 1.120365858078003, "reward_std": 0.13358232378959656, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5873925685882568, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.923865795135498, "step": 143 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5897100153295861, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.5052941176470589, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008971001532958586, "calib/mean_conf": 0.09078431372549019, "calib/mu_c": 0.0944078947368421, "calib/mu_w": 0.08543689320388351, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01938699152048357, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 467.51953125, "completions/mean_terminated_length": 469.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.1536, "grad_norm": 0.016215428709983826, "learning_rate": 1.5555555555555558e-06, "loss": 0.0181, "num_tokens": 31029602.0, "reward": 1.1693274974822998, "reward_std": 0.17095816135406494, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5058691501617432, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8921340107917786, "step": 144 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5698763955342903, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.5119521912350598, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006987639553429029, "calib/mean_conf": 0.09362549800796813, "calib/mu_c": 0.09638157894736842, "calib/mu_w": 0.08939393939393939, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.016676055407534942, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 518.86328125, "completions/mean_terminated_length": 525.0158081054688, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.15466666666666667, "grad_norm": 0.01040293276309967, "learning_rate": 1.527777777777778e-06, "loss": -0.0028, "num_tokens": 31240815.0, "reward": 1.1662088632583618, "reward_std": 0.21874991059303284, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.4884374737739563, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9223355054855347, "step": 145 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.542289523069954, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.3021653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004228952306995382, "calib/mean_conf": 0.09547244094488189, "calib/mu_c": 0.09801980198019798, "calib/mu_w": 0.0937908496732026, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.014348489877277104, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1820.0, "completions/max_terminated_length": 1820.0, "completions/mean_length": 486.7421875, "completions/mean_terminated_length": 488.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.15573333333333333, "grad_norm": 0.010282909497618675, "learning_rate": 1.5e-06, "loss": 0.0027, "num_tokens": 31448317.0, "reward": 1.0654511451721191, "reward_std": 0.16977518796920776, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6657519936561584, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9553008675575256, "step": 146 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5517878348067027, "calib/avg_num_step_conf": 4.5625, "calib/ece": 0.33433734939759036, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0051787834806703015, "calib/mean_conf": 0.09136546184738956, "calib/mu_c": 0.09433962264150944, "calib/mu_w": 0.08916083916083914, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.018898985647955734, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 544.08203125, "completions/mean_terminated_length": 546.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.1568, "grad_norm": 0.009104374796152115, "learning_rate": 1.4722222222222225e-06, "loss": -0.0059, "num_tokens": 31666962.0, "reward": 1.0537787675857544, "reward_std": 0.1528664082288742, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6282519698143005, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9132986068725586, "step": 147 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6466361394857345, "calib/avg_num_step_conf": 4.6484375, "calib/ece": 0.5700396825396825, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014663613948573448, "calib/mean_conf": 0.09265873015873016, "calib/mu_c": 0.09760479041916166, "calib/mu_w": 0.08294117647058821, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.01769658863117844, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 477.36328125, "completions/mean_terminated_length": 481.1220397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.15786666666666666, "grad_norm": 0.008585936389863491, "learning_rate": 1.4444444444444445e-06, "loss": -0.0094, "num_tokens": 31869959.0, "reward": 1.1949286460876465, "reward_std": 0.1767604947090149, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.450615257024765, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8753587007522583, "step": 148 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6027783112476794, "calib/avg_num_step_conf": 4.734375, "calib/ece": 0.41659999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01027783112476792, "calib/mean_conf": 0.09140000000000001, "calib/mu_c": 0.09645669291338581, "calib/mu_w": 0.08617886178861789, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.018869022232219666, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1989.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 536.59375, "completions/mean_terminated_length": 538.6980590820312, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.15893333333333334, "grad_norm": 0.011967609636485577, "learning_rate": 1.4166666666666667e-06, "loss": 0.0278, "num_tokens": 32087463.0, "reward": 1.0985041856765747, "reward_std": 0.24534091353416443, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.563769519329071, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8930402398109436, "step": 149 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.592723880597015, "calib/avg_num_step_conf": 4.67578125, "calib/ece": 0.4364173228346457, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009272388059701445, "calib/mean_conf": 0.09114173228346456, "calib/mu_c": 0.09552238805970147, "calib/mu_w": 0.08625000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.019090428986509384, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 437.40234375, "completions/mean_terminated_length": 440.8464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.16, "grad_norm": 0.015229527838528156, "learning_rate": 1.3888888888888892e-06, "loss": -0.0326, "num_tokens": 32280078.0, "reward": 1.1190340518951416, "reward_std": 0.18449994921684265, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5601464509963989, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8652180433273315, "step": 150 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5941932624113475, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.35731225296442687, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009419326241134784, "calib/mean_conf": 0.08537549407114625, "calib/mu_c": 0.09062500000000002, "calib/mu_w": 0.08120567375886524, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.022745309907310766, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2732.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 570.6171875, "completions/mean_terminated_length": 575.1102294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.16106666666666666, "grad_norm": 0.009542996063828468, "learning_rate": 1.3611111111111112e-06, "loss": 0.0044, "num_tokens": 32508860.0, "reward": 1.0749963521957397, "reward_std": 0.19749057292938232, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6223633289337158, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.909946084022522, "step": 151 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6332244218079313, "calib/avg_num_step_conf": 4.71875, "calib/ece": 0.4302, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013322442180793106, "calib/mean_conf": 0.08580000000000002, "calib/mu_c": 0.09224806201550387, "calib/mu_w": 0.07892561983471076, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.0, "calib/std_conf": 0.022546840133375674, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2797.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 519.3515625, "completions/mean_terminated_length": 529.6972045898438, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.16213333333333332, "grad_norm": 0.007988336496055126, "learning_rate": 1.3333333333333334e-06, "loss": -0.0165, "num_tokens": 32722886.0, "reward": 1.105271816253662, "reward_std": 0.20392175018787384, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5540722608566284, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9082551598548889, "step": 152 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.650771565899317, "calib/avg_num_step_conf": 5.35546875, "calib/ece": 0.45000000000000007, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015077156589931681, "calib/mean_conf": 0.08174603174603175, "calib/mu_c": 0.08880597014925372, "calib/mu_w": 0.07372881355932204, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.024072620457306508, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2220.0, "completions/max_terminated_length": 2220.0, "completions/mean_length": 540.6171875, "completions/mean_terminated_length": 542.7373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.1632, "grad_norm": 0.007903658784925938, "learning_rate": 1.3055555555555556e-06, "loss": 0.0549, "num_tokens": 32944284.0, "reward": 1.1284171342849731, "reward_std": 0.2020460069179535, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5463769435882568, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9349770545959473, "step": 153 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6056811048336472, "calib/avg_num_step_conf": 5.23828125, "calib/ece": 0.3851778656126483, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010568110483364729, "calib/mean_conf": 0.08122529644268774, "calib/mu_c": 0.08686440677966102, "calib/mu_w": 0.07629629629629629, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.024212510902437596, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2548.0, "completions/max_terminated_length": 2548.0, "completions/mean_length": 523.140625, "completions/mean_terminated_length": 529.3438720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.16426666666666667, "grad_norm": 0.007686985656619072, "learning_rate": 1.2777777777777779e-06, "loss": -0.0481, "num_tokens": 33158328.0, "reward": 1.0843218564987183, "reward_std": 0.13767561316490173, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5995800495147705, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9006271958351135, "step": 154 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6250559284116332, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.33405511811023625, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01250559284116333, "calib/mean_conf": 0.07933070866141732, "calib/mu_c": 0.08666666666666668, "calib/mu_w": 0.07416107382550335, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.024622042207947028, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 490.5546875, "completions/mean_terminated_length": 492.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.16533333333333333, "grad_norm": 0.01461772620677948, "learning_rate": 1.25e-06, "loss": 0.0216, "num_tokens": 33366806.0, "reward": 1.0643209218978882, "reward_std": 0.17543260753154755, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6423828601837158, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9365804195404053, "step": 155 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6879252752690832, "calib/avg_num_step_conf": 6.046875, "calib/ece": 0.3847058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018792527526908312, "calib/mean_conf": 0.0780392156862745, "calib/mu_c": 0.08813559322033897, "calib/mu_w": 0.06934306569343066, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024814575716951177, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 521.125, "completions/mean_terminated_length": 523.1686401367188, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1664, "grad_norm": 0.011230715550482273, "learning_rate": 1.2222222222222223e-06, "loss": -0.0324, "num_tokens": 33580654.0, "reward": 1.099372148513794, "reward_std": 0.17626884579658508, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6097265481948853, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9358479976654053, "step": 156 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6294715579592102, "calib/avg_num_step_conf": 5.67578125, "calib/ece": 0.5146825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012947155795921025, "calib/mean_conf": 0.07658730158730159, "calib/mu_c": 0.08187919463087247, "calib/mu_w": 0.06893203883495144, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.024949558586695474, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1704.0, "completions/max_terminated_length": 1704.0, "completions/mean_length": 505.3359375, "completions/mean_terminated_length": 511.3280944824219, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.16746666666666668, "grad_norm": 0.011182314716279507, "learning_rate": 1.1944444444444446e-06, "loss": -0.0221, "num_tokens": 33789428.0, "reward": 1.1364257335662842, "reward_std": 0.16615410149097443, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.4912695288658142, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8412890434265137, "step": 157 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5744284954811271, "calib/avg_num_step_conf": 6.3046875, "calib/ece": 0.5243027888446214, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007442849548112723, "calib/mean_conf": 0.08127490039840637, "calib/mu_c": 0.08421052631578947, "calib/mu_w": 0.07676767676767675, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.024199702993840225, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 506.890625, "completions/mean_terminated_length": 508.8784484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.16853333333333334, "grad_norm": 0.017442265525460243, "learning_rate": 1.1666666666666668e-06, "loss": -0.007, "num_tokens": 34000112.0, "reward": 1.1422319412231445, "reward_std": 0.22000621259212494, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.46803712844848633, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8703532218933105, "step": 158 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6704815509693559, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.43596837944664035, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017048155096935566, "calib/mean_conf": 0.07786561264822134, "calib/mu_c": 0.08615384615384615, "calib/mu_w": 0.06910569105691058, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.024835222248861675, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 550.7421875, "completions/mean_terminated_length": 552.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1696, "grad_norm": 0.009645085781812668, "learning_rate": 1.138888888888889e-06, "loss": 0.0177, "num_tokens": 34221566.0, "reward": 1.1186250448226929, "reward_std": 0.1828586310148239, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5613672137260437, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9252033829689026, "step": 159 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5874056662130397, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.4613725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008740566621303958, "calib/mean_conf": 0.07588235294117644, "calib/mu_c": 0.07992700729927006, "calib/mu_w": 0.0711864406779661, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.024984424213641537, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1610.0, "completions/max_terminated_length": 1610.0, "completions/mean_length": 515.1640625, "completions/mean_terminated_length": 517.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.17066666666666666, "grad_norm": 0.013785509392619133, "learning_rate": 1.111111111111111e-06, "loss": 0.0145, "num_tokens": 34433968.0, "reward": 1.1234824657440186, "reward_std": 0.20462122559547424, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5362597703933716, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8839104175567627, "step": 160 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5733961983961984, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.6273437499999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008286308286308275, "calib/mean_conf": 0.08359375000000002, "calib/mu_c": 0.08598901098901097, "calib/mu_w": 0.0777027027027027, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.027321419453196424, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1802.0, "completions/max_terminated_length": 1802.0, "completions/mean_length": 488.19921875, "completions/mean_terminated_length": 490.11376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.17173333333333332, "grad_norm": 0.018429474905133247, "learning_rate": 1.0833333333333335e-06, "loss": 0.0109, "num_tokens": 34638547.0, "reward": 1.231764793395996, "reward_std": 0.16334450244903564, "rewards/accuracy_reward_step": 0.7109375, "rewards/final_brier_reward_step": 0.4035937488079071, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8761218786239624, "step": 161 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5352941176470589, "calib/avg_num_step_conf": 5.484375, "calib/ece": 0.5807843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003529411764705878, "calib/mean_conf": 0.08588235294117647, "calib/mu_c": 0.08705882352941176, "calib/mu_w": 0.08352941176470588, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.022507207611422345, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2149.0, "completions/max_terminated_length": 2149.0, "completions/mean_length": 479.75390625, "completions/mean_terminated_length": 481.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1728, "grad_norm": 0.016633324325084686, "learning_rate": 1.0555555555555557e-06, "loss": -0.0073, "num_tokens": 34841188.0, "reward": 1.1983163356781006, "reward_std": 0.14873743057250977, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.4359374940395355, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8682651519775391, "step": 162 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6971554993678887, "calib/avg_num_step_conf": 5.2265625, "calib/ece": 0.47430830039525695, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.019715549936788854, "calib/mean_conf": 0.07905138339920949, "calib/mu_c": 0.08785714285714284, "calib/mu_w": 0.06814159292035399, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.024669541798594676, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 536.28515625, "completions/mean_terminated_length": 540.5078735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.17386666666666667, "grad_norm": 0.01776149682700634, "learning_rate": 1.0277777777777777e-06, "loss": 0.073, "num_tokens": 35058989.0, "reward": 1.14092218875885, "reward_std": 0.19047382473945618, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5307226777076721, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9194308519363403, "step": 163 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6394538606403014, "calib/avg_num_step_conf": 5.23828125, "calib/ece": 0.45454545454545453, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013945386064030105, "calib/mean_conf": 0.07905138339920949, "calib/mu_c": 0.08555555555555554, "calib/mu_w": 0.07161016949152543, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.024669541798594676, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 533.73046875, "completions/mean_terminated_length": 540.059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.17493333333333333, "grad_norm": 0.03917751833796501, "learning_rate": 1.0000000000000002e-06, "loss": -0.0335, "num_tokens": 35277440.0, "reward": 1.1271326541900635, "reward_std": 0.1549869328737259, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5443945527076721, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9150543212890625, "step": 164 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5979746367889682, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.4349019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009797463678896806, "calib/mean_conf": 0.0788235294117647, "calib/mu_c": 0.08358778625954198, "calib/mu_w": 0.07379032258064518, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.024705882352941175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1468.0, "completions/max_terminated_length": 1468.0, "completions/mean_length": 511.83984375, "completions/mean_terminated_length": 513.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.176, "grad_norm": 0.013871337287127972, "learning_rate": 9.722222222222224e-07, "loss": -0.0228, "num_tokens": 35489727.0, "reward": 1.1261584758758545, "reward_std": 0.15966832637786865, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5631250143051147, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9330711364746094, "step": 165 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6554248366013071, "calib/avg_num_step_conf": 5.02734375, "calib/ece": 0.5213438735177865, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015542483660130738, "calib/mean_conf": 0.083399209486166, "calib/mu_c": 0.08954248366013072, "calib/mu_w": 0.07399999999999998, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.023546831634160444, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2577.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 540.76171875, "completions/mean_terminated_length": 545.0196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.17706666666666668, "grad_norm": 0.009842603467404842, "learning_rate": 9.444444444444445e-07, "loss": 0.0084, "num_tokens": 35710026.0, "reward": 1.168914794921875, "reward_std": 0.18666985630989075, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.48636719584465027, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.918549656867981, "step": 166 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6343370379943137, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.49186507936507934, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01343370379943136, "calib/mean_conf": 0.0875, "calib/mu_c": 0.09315068493150684, "calib/mu_w": 0.07971698113207548, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.021650635094610966, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2314.0, "completions/max_terminated_length": 2314.0, "completions/mean_length": 508.32421875, "completions/mean_terminated_length": 510.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.17813333333333334, "grad_norm": 0.009834786877036095, "learning_rate": 9.166666666666666e-07, "loss": -0.0031, "num_tokens": 35921445.0, "reward": 1.1382412910461426, "reward_std": 0.1689327359199524, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5123144388198853, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8533361554145813, "step": 167 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6398277437035104, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.535686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013982774370351031, "calib/mean_conf": 0.08392156862745097, "calib/mu_c": 0.08924050632911391, "calib/mu_w": 0.07525773195876288, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.023353920724916457, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1719.0, "completions/max_terminated_length": 1719.0, "completions/mean_length": 526.44140625, "completions/mean_terminated_length": 528.5059204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.1792, "grad_norm": 0.015063202939927578, "learning_rate": 8.88888888888889e-07, "loss": 0.0472, "num_tokens": 36136566.0, "reward": 1.1882625818252563, "reward_std": 0.19077295064926147, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.4815039038658142, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9228549003601074, "step": 168 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.633487603305785, "calib/avg_num_step_conf": 4.6796875, "calib/ece": 0.4065040650406504, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013348760330578524, "calib/mean_conf": 0.08536585365853659, "calib/mu_c": 0.09214876033057852, "calib/mu_w": 0.0788, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.0, "calib/std_conf": 0.022749705007533698, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2893.0, "completions/max_terminated_length": 2893.0, "completions/mean_length": 512.16015625, "completions/mean_terminated_length": 518.2332153320312, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.18026666666666666, "grad_norm": 0.009256192483007908, "learning_rate": 8.611111111111112e-07, "loss": -0.0376, "num_tokens": 36347543.0, "reward": 1.072580099105835, "reward_std": 0.18603584170341492, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5678906440734863, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8795391321182251, "step": 169 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6563919532770928, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.4558232931726908, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01563919532770923, "calib/mean_conf": 0.08232931726907632, "calib/mu_c": 0.08955223880597012, "calib/mu_w": 0.07391304347826089, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.023901487576492388, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1667.0, "completions/max_terminated_length": 1667.0, "completions/mean_length": 535.40234375, "completions/mean_terminated_length": 546.0677490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.18133333333333335, "grad_norm": 0.016515735536813736, "learning_rate": 8.333333333333333e-07, "loss": -0.0212, "num_tokens": 36564438.0, "reward": 1.1249117851257324, "reward_std": 0.216035395860672, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5319238901138306, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.9389243721961975, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6654024957181306, "calib/avg_num_step_conf": 4.96484375, "calib/ece": 0.3921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016540249571813076, "calib/mean_conf": 0.08437500000000002, "calib/mu_c": 0.09303278688524591, "calib/mu_w": 0.07649253731343283, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02317562027217395, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 479.8828125, "completions/mean_terminated_length": 481.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.1824, "grad_norm": 0.013426011428236961, "learning_rate": 8.055555555555557e-07, "loss": 0.0009, "num_tokens": 36769864.0, "reward": 1.1131434440612793, "reward_std": 0.1605221927165985, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6044531464576721, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9374175667762756, "step": 171 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6706693919264181, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.508235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017066939192641795, "calib/mean_conf": 0.08784313725490195, "calib/mu_c": 0.09473684210526315, "calib/mu_w": 0.07766990291262135, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.021448865365136436, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 481.19140625, "completions/mean_terminated_length": 483.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.18346666666666667, "grad_norm": 0.0074419486336410046, "learning_rate": 7.777777777777779e-07, "loss": -0.007, "num_tokens": 36972081.0, "reward": 1.182291030883789, "reward_std": 0.17799603939056396, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5066992044448853, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9423279762268066, "step": 172 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5288003051687965, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.49015748031496065, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002880030516879614, "calib/mean_conf": 0.08858267716535433, "calib/mu_c": 0.08979591836734693, "calib/mu_w": 0.08691588785046732, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.020988351079152503, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2773.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 544.09765625, "completions/mean_terminated_length": 548.3818969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.18453333333333333, "grad_norm": 0.010779325850307941, "learning_rate": 7.5e-07, "loss": -0.0094, "num_tokens": 37190210.0, "reward": 1.1378142833709717, "reward_std": 0.20299270749092102, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5128710865974426, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8317651152610779, "step": 173 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5471066624960901, "calib/avg_num_step_conf": 5.0546875, "calib/ece": 0.4594488188976378, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004710666249609016, "calib/mean_conf": 0.08779527559055117, "calib/mu_c": 0.08992805755395683, "calib/mu_w": 0.08521739130434781, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.021477451491316312, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2617.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 532.14453125, "completions/mean_terminated_length": 532.14453125, "completions/min_length": 221.0, "completions/min_terminated_length": 221.0, "epoch": 0.1856, "grad_norm": 0.00913445558398962, "learning_rate": 7.222222222222222e-07, "loss": 0.0721, "num_tokens": 37406351.0, "reward": 1.1227368116378784, "reward_std": 0.21584494411945343, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5349023342132568, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.853955090045929, "step": 174 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6414965986394559, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.3301587301587302, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01414965986394559, "calib/mean_conf": 0.0865079365079365, "calib/mu_c": 0.09476190476190476, "calib/mu_w": 0.08061224489795916, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.02219385945096756, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 541.32421875, "completions/mean_terminated_length": 549.9166870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.18666666666666668, "grad_norm": 0.009307211264967918, "learning_rate": 6.944444444444446e-07, "loss": -0.0549, "num_tokens": 37626434.0, "reward": 1.0552879571914673, "reward_std": 0.17625892162322998, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.644101619720459, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8985738158226013, "step": 175 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6590648854961833, "calib/avg_num_step_conf": 4.5703125, "calib/ece": 0.43486055776892435, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01590648854961829, "calib/mean_conf": 0.08705179282868526, "calib/mu_c": 0.0946564885496183, "calib/mu_w": 0.07875000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.021903294035702726, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 489.55859375, "completions/mean_terminated_length": 491.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.18773333333333334, "grad_norm": 0.0212948489934206, "learning_rate": 6.666666666666667e-07, "loss": 0.0201, "num_tokens": 37831505.0, "reward": 1.1138497591018677, "reward_std": 0.18853960931301117, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5538281202316284, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9102426767349243, "step": 176 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6754429133858268, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.41117647058823537, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01754429133858268, "calib/mean_conf": 0.0868627450980392, "calib/mu_c": 0.0956692913385827, "calib/mu_w": 0.07812500000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022006255445644232, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 491.7578125, "completions/mean_terminated_length": 493.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.1888, "grad_norm": 0.008859474211931229, "learning_rate": 6.388888888888889e-07, "loss": -0.0356, "num_tokens": 38036907.0, "reward": 1.1183327436447144, "reward_std": 0.14114660024642944, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5869238376617432, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9166704416275024, "step": 177 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6448474126492703, "calib/avg_num_step_conf": 4.46484375, "calib/ece": 0.43988095238095243, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014484741264927006, "calib/mean_conf": 0.08789682539682539, "calib/mu_c": 0.09473684210526313, "calib/mu_w": 0.08025210084033613, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.02141662659439622, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 481.703125, "completions/mean_terminated_length": 485.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.18986666666666666, "grad_norm": 0.01023717038333416, "learning_rate": 6.111111111111112e-07, "loss": 0.015, "num_tokens": 38241975.0, "reward": 1.1178292036056519, "reward_std": 0.16584745049476624, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.555224597454071, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.888992428779602, "step": 178 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6491150442477875, "calib/avg_num_step_conf": 4.49609375, "calib/ece": 0.4650197628458499, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014911504424778732, "calib/mean_conf": 0.0883399209486166, "calib/mu_c": 0.09499999999999997, "calib/mu_w": 0.08008849557522124, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.021143474385366755, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2559.0, "completions/max_terminated_length": 2559.0, "completions/mean_length": 517.0234375, "completions/mean_terminated_length": 517.0234375, "completions/min_length": 244.0, "completions/min_terminated_length": 244.0, "epoch": 0.19093333333333334, "grad_norm": 0.01045550312846899, "learning_rate": 5.833333333333334e-07, "loss": 0.0674, "num_tokens": 38456277.0, "reward": 1.1350390911102295, "reward_std": 0.21084295213222504, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5332910418510437, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8923243880271912, "step": 179 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.643921568627451, "calib/avg_num_step_conf": 4.4921875, "calib/ece": 0.5087301587301587, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014392156862745087, "calib/mean_conf": 0.08650793650793652, "calib/mu_c": 0.09233333333333331, "calib/mu_w": 0.07794117647058822, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.022193859450967565, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2197.0, "completions/max_terminated_length": 2197.0, "completions/mean_length": 561.06640625, "completions/mean_terminated_length": 563.2667236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.192, "grad_norm": 0.011729094199836254, "learning_rate": 5.555555555555555e-07, "loss": 0.0002, "num_tokens": 38679446.0, "reward": 1.1534072160720825, "reward_std": 0.16474701464176178, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.49878910183906555, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.87855064868927, "step": 180 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5728070175438597, "calib/avg_num_step_conf": 4.71875, "calib/ece": 0.4594488188976378, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007280701754385943, "calib/mean_conf": 0.09173228346456692, "calib/mu_c": 0.09499999999999997, "calib/mu_w": 0.08771929824561403, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01857500175131567, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 476.7734375, "completions/mean_terminated_length": 478.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.19306666666666666, "grad_norm": 0.01009936723858118, "learning_rate": 5.277777777777779e-07, "loss": -0.0001, "num_tokens": 38883444.0, "reward": 1.1534780263900757, "reward_std": 0.171014666557312, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.54052734375, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9484823942184448, "step": 181 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5915032679738562, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.5131372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00915032679738563, "calib/mean_conf": 0.0868627450980392, "calib/mu_c": 0.09052287581699345, "calib/mu_w": 0.08137254901960782, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022006255445644236, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2051.0, "completions/max_terminated_length": 2051.0, "completions/mean_length": 489.67578125, "completions/mean_terminated_length": 491.5960998535156, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.19413333333333332, "grad_norm": 0.011595308780670166, "learning_rate": 5.000000000000001e-07, "loss": -0.0103, "num_tokens": 39090641.0, "reward": 1.1820398569107056, "reward_std": 0.13127438724040985, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.49864256381988525, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9418116807937622, "step": 182 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6260573618821041, "calib/avg_num_step_conf": 4.69921875, "calib/ece": 0.5294466403162056, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012605736188210412, "calib/mean_conf": 0.08715415019762844, "calib/mu_c": 0.09198717948717948, "calib/mu_w": 0.07938144329896907, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.021846661826775464, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 544.23828125, "completions/mean_terminated_length": 550.6917114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.1952, "grad_norm": 0.02083410508930683, "learning_rate": 4.7222222222222226e-07, "loss": -0.0343, "num_tokens": 39312326.0, "reward": 1.1755211353302002, "reward_std": 0.20563694834709167, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.47916996479034424, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9124943017959595, "step": 183 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5434169278996865, "calib/avg_num_step_conf": 4.6640625, "calib/ece": 0.47843137254901963, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004341692789968668, "calib/mean_conf": 0.09019607843137255, "calib/mu_c": 0.09206896551724138, "calib/mu_w": 0.08772727272727271, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.019851428167957437, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2040.0, "completions/max_terminated_length": 2040.0, "completions/mean_length": 472.265625, "completions/mean_terminated_length": 474.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.19626666666666667, "grad_norm": 0.00886443629860878, "learning_rate": 4.444444444444445e-07, "loss": 0.0028, "num_tokens": 39514186.0, "reward": 1.1536989212036133, "reward_std": 0.1764037013053894, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5254883170127869, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8997563123703003, "step": 184 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6647456279809221, "calib/avg_num_step_conf": 4.3671875, "calib/ece": 0.4631578947368421, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01647456279809223, "calib/mean_conf": 0.08744939271255062, "calib/mu_c": 0.09485294117647058, "calib/mu_w": 0.07837837837837835, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.0, "calib/std_conf": 0.021679774470429618, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 1855.0, "completions/max_terminated_length": 1855.0, "completions/mean_length": 476.40234375, "completions/mean_terminated_length": 489.795166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.19733333333333333, "grad_norm": 0.01681617647409439, "learning_rate": 4.1666666666666667e-07, "loss": -0.0158, "num_tokens": 39718745.0, "reward": 1.1039948463439941, "reward_std": 0.17881864309310913, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5226464867591858, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8613112568855286, "step": 185 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6417481311098332, "calib/avg_num_step_conf": 4.25, "calib/ece": 0.4732142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0141748131109833, "calib/mean_conf": 0.08630952380952381, "calib/mu_c": 0.09255319148936168, "calib/mu_w": 0.07837837837837838, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.022295620000390527, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2163.0, "completions/max_terminated_length": 2163.0, "completions/mean_length": 535.80859375, "completions/mean_terminated_length": 540.0275268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.1984, "grad_norm": 0.008796021342277527, "learning_rate": 3.8888888888888895e-07, "loss": 0.0204, "num_tokens": 39936632.0, "reward": 1.1320539712905884, "reward_std": 0.16783073544502258, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5277246236801147, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8758916258811951, "step": 186 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6205218929677134, "calib/avg_num_step_conf": 4.375, "calib/ece": 0.43948412698412703, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012052189296771293, "calib/mean_conf": 0.0882936507936508, "calib/mu_c": 0.093984962406015, "calib/mu_w": 0.08193277310924371, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.02117259664227484, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 525.6171875, "completions/mean_terminated_length": 533.9603271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.19946666666666665, "grad_norm": 0.014326175674796104, "learning_rate": 3.611111111111111e-07, "loss": 0.0024, "num_tokens": 40148414.0, "reward": 1.1215332746505737, "reward_std": 0.1880277693271637, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5543847680091858, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9054886102676392, "step": 187 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6488744989207524, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.465625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014887449892075216, "calib/mean_conf": 0.08515625000000002, "calib/mu_c": 0.09184397163120567, "calib/mu_w": 0.07695652173913045, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022844049245646012, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1808.0, "completions/max_terminated_length": 1808.0, "completions/mean_length": 540.26953125, "completions/mean_terminated_length": 542.3882446289062, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.20053333333333334, "grad_norm": 0.016202721744775772, "learning_rate": 3.3333333333333335e-07, "loss": 0.0409, "num_tokens": 40366475.0, "reward": 1.1602380275726318, "reward_std": 0.1599513441324234, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5426172018051147, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9525926113128662, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6365546218487395, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.4470588235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013655462184873915, "calib/mean_conf": 0.08627450980392157, "calib/mu_c": 0.09264705882352942, "calib/mu_w": 0.0789915966386555, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.022313346424982436, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1884.0, "completions/max_terminated_length": 1884.0, "completions/mean_length": 503.34375, "completions/mean_terminated_length": 505.31768798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.2016, "grad_norm": 0.009709118865430355, "learning_rate": 3.055555555555556e-07, "loss": 0.0062, "num_tokens": 40578779.0, "reward": 1.1314115524291992, "reward_std": 0.1800665259361267, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.555371105670929, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8914668560028076, "step": 189 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6638993710691824, "calib/avg_num_step_conf": 4.578125, "calib/ece": 0.500390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016389937106918204, "calib/mean_conf": 0.08554687500000002, "calib/mu_c": 0.09233333333333332, "calib/mu_w": 0.07594339622641512, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02266635011938126, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2194.0, "completions/max_terminated_length": 2194.0, "completions/mean_length": 525.00390625, "completions/mean_terminated_length": 527.0628051757812, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.20266666666666666, "grad_norm": 0.021329551935195923, "learning_rate": 2.7777777777777776e-07, "loss": 0.0263, "num_tokens": 40794468.0, "reward": 1.1731104850769043, "reward_std": 0.1828664243221283, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5144335627555847, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9198249578475952, "step": 190 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5998120300751879, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.3582677165354331, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009981203007518827, "calib/mean_conf": 0.09055118110236221, "calib/mu_c": 0.09605263157894738, "calib/mu_w": 0.08607142857142855, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01957449274748985, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1578.0, "completions/max_terminated_length": 1578.0, "completions/mean_length": 480.76171875, "completions/mean_terminated_length": 482.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.20373333333333332, "grad_norm": 0.01765361987054348, "learning_rate": 2.5000000000000004e-07, "loss": -0.0135, "num_tokens": 40997391.0, "reward": 1.0853232145309448, "reward_std": 0.1648622453212738, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6239062547683716, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.91535484790802, "step": 191 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.643006993006993, "calib/avg_num_step_conf": 4.5859375, "calib/ece": 0.48122529644268774, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014300699300699243, "calib/mean_conf": 0.08399209486166008, "calib/mu_c": 0.0902097902097902, "calib/mu_w": 0.07590909090909095, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.023326856410560478, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2388.0, "completions/max_terminated_length": 2388.0, "completions/mean_length": 529.7734375, "completions/mean_terminated_length": 533.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.2048, "grad_norm": 0.014357575215399265, "learning_rate": 2.2222222222222224e-07, "loss": 0.0052, "num_tokens": 41213669.0, "reward": 1.146650791168213, "reward_std": 0.19800010323524475, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5190625190734863, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9203532934188843, "step": 192 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5985507246376811, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.45790513833992097, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009855072463768128, "calib/mean_conf": 0.0875494071146245, "calib/mu_c": 0.09202898550724638, "calib/mu_w": 0.08217391304347825, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02162203461914285, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 518.34375, "completions/mean_terminated_length": 522.4251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.20586666666666667, "grad_norm": 0.012046567164361477, "learning_rate": 1.9444444444444447e-07, "loss": 0.0088, "num_tokens": 41427757.0, "reward": 1.1411765813827515, "reward_std": 0.2215532511472702, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5404003858566284, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.932343065738678, "step": 193 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6130128366473697, "calib/avg_num_step_conf": 4.60546875, "calib/ece": 0.45434782608695656, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011301283664736941, "calib/mean_conf": 0.08715415019762845, "calib/mu_c": 0.09233576642335765, "calib/mu_w": 0.08103448275862071, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.021846661826775464, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1896.0, "completions/max_terminated_length": 1896.0, "completions/mean_length": 485.59375, "completions/mean_terminated_length": 491.351806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.20693333333333333, "grad_norm": 0.011294323019683361, "learning_rate": 1.6666666666666668e-07, "loss": -0.0256, "num_tokens": 41633693.0, "reward": 1.1376094818115234, "reward_std": 0.15752781927585602, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5439746379852295, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9265509247779846, "step": 194 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6576485461441214, "calib/avg_num_step_conf": 4.5859375, "calib/ece": 0.4689723320158103, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015764854614412113, "calib/mean_conf": 0.08438735177865611, "calib/mu_c": 0.09142857142857141, "calib/mu_w": 0.0756637168141593, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.02317061990072259, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 529.19140625, "completions/mean_terminated_length": 531.2667236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.208, "grad_norm": 0.019001536071300507, "learning_rate": 1.3888888888888888e-07, "loss": 0.0243, "num_tokens": 41850830.0, "reward": 1.1309077739715576, "reward_std": 0.1469835638999939, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5338379144668579, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8731424808502197, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5560987684881491, "calib/avg_num_step_conf": 4.75, "calib/ece": 0.46386718750000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005609876848814865, "calib/mean_conf": 0.09472656250000001, "calib/mu_c": 0.09720279720279718, "calib/mu_w": 0.09159292035398231, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.015357823150876356, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1108.0, "completions/max_terminated_length": 1108.0, "completions/mean_length": 405.54296875, "completions/mean_terminated_length": 407.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.20906666666666668, "grad_norm": 0.030068090185523033, "learning_rate": 1.1111111111111112e-07, "loss": -0.0074, "num_tokens": 42032873.0, "reward": 1.1597189903259277, "reward_std": 0.16683349013328552, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5407910346984863, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9229186177253723, "step": 196 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6479433878814684, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.38591269841269843, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014794338788146844, "calib/mean_conf": 0.08630952380952381, "calib/mu_c": 0.09411764705882353, "calib/mu_w": 0.07932330827067668, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.022295620000390527, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 535.5078125, "completions/mean_terminated_length": 544.0079956054688, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.21013333333333334, "grad_norm": 0.013797251507639885, "learning_rate": 8.333333333333334e-08, "loss": -0.0004, "num_tokens": 42250699.0, "reward": 1.0935965776443481, "reward_std": 0.19439850747585297, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5953418016433716, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9321398138999939, "step": 197 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6408385093167702, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.46215686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014083850931677008, "calib/mean_conf": 0.0868627450980392, "calib/mu_c": 0.0932142857142857, "calib/mu_w": 0.07913043478260869, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.022006255445644236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1970.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 468.3046875, "completions/mean_terminated_length": 470.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.2112, "grad_norm": 0.02300938405096531, "learning_rate": 5.555555555555556e-08, "loss": 0.025, "num_tokens": 42451649.0, "reward": 1.145423173904419, "reward_std": 0.1318647563457489, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5424317121505737, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8968296647071838, "step": 198 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6243405945509837, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.43169291338582677, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012434059455098359, "calib/mean_conf": 0.08405511811023621, "calib/mu_c": 0.09007633587786258, "calib/mu_w": 0.07764227642276422, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.023302464161750625, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 559.46484375, "completions/mean_terminated_length": 561.6588745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.21226666666666666, "grad_norm": 0.011882714927196503, "learning_rate": 2.777777777777778e-08, "loss": 0.0657, "num_tokens": 42674752.0, "reward": 1.123356580734253, "reward_std": 0.23471125960350037, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5651074647903442, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9194613099098206, "step": 199 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7657669519420671, "calib/avg_num_step_conf": 4.6171875, "calib/ece": 0.5300395256916997, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02657669519420669, "calib/mean_conf": 0.08260869565217391, "calib/mu_c": 0.09290322580645159, "calib/mu_w": 0.0663265306122449, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.02381402423935505, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2313.0, "completions/max_terminated_length": 2313.0, "completions/mean_length": 535.796875, "completions/mean_terminated_length": 540.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.21333333333333335, "grad_norm": 0.011222266592085361, "learning_rate": 0.0, "loss": 0.0136, "num_tokens": 42895644.0, "reward": 1.1725003719329834, "reward_std": 0.12921534478664398, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.4880078434944153, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8967982530593872, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.01614250882237684, "train_runtime": 9613.004, "train_samples_per_second": 5.326, "train_steps_per_second": 0.021 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 42895644, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }