{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.01953125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 584.16015625, "completions/mean_terminated_length": 667.6116333007812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0, "learning_rate": 2.5000000000000004e-07, "loss": 0.0, "num_tokens": 232809.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 1 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.00390625, "calib/ece": 0.9, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.9, "calib/mu_c": NaN, "calib/mu_w": 0.9, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.00390625, "calib/pce": 0.9, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 643.5234375, "completions/mean_terminated_length": 725.7356567382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 0.0, "num_tokens": 476519.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 2 }, { "calib/answer_extract_rate": 0.015625, "calib/avg_num_step_conf": 0.01953125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 736.22265625, "completions/mean_terminated_length": 812.3836059570312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0032, "grad_norm": 0.0, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 745928.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 3 }, { "calib/answer_extract_rate": 0.0078125, "calib/avg_num_step_conf": 0.0, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0078125, "calib/nonempty_step_conf_rate": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 764.01953125, "completions/mean_terminated_length": 854.1004638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.004266666666666667, "grad_norm": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.0, "num_tokens": 1023365.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 4 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.046875, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.01171875, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 677.6640625, "completions/mean_terminated_length": 803.1574096679688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.005333333333333333, "grad_norm": 0.0, "learning_rate": 1.25e-06, "loss": 0.0, "num_tokens": 1279215.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 5 }, { "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.015625, "calib/ece": 0.595, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.18999999999999995, "calib/mean_conf": 0.895, "calib/mu_c": 0.8, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.495, "calib/std_conf": 0.09499999999999997, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 629.1484375, "completions/mean_terminated_length": 715.8311157226562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.002231498947367072, "learning_rate": 1.5e-06, "loss": -0.002, "num_tokens": 1521909.0, "reward": 0.009758099913597107, "reward_std": 0.02760007604956627, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0038277343846857548, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.006376933306455612, "step": 6 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.595, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.18999999999999995, "calib/mean_conf": 0.895, "calib/mu_c": 0.8, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.015625, "calib/pce": 0.495, "calib/std_conf": 0.09499999999999997, "calib/step_conf_rate": 0.015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2870.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 650.34375, "completions/mean_terminated_length": 743.2500610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.0029239056166261435, "learning_rate": 1.75e-06, "loss": 0.003, "num_tokens": 1771501.0, "reward": 0.0094246044754982, "reward_std": 0.026656806468963623, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0038277343846857548, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.005042951088398695, "step": 7 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.0078125, "calib/ece": 0.30000000000000004, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.7, "calib/mu_c": 0.7, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 648.08203125, "completions/mean_terminated_length": 718.2207641601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.0007931651780381799, "learning_rate": 2.0000000000000003e-06, "loss": 0.005, "num_tokens": 2019602.0, "reward": 0.007640509866178036, "reward_std": 0.021610625088214874, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.003554687602445483, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0031401650048792362, "step": 8 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.01171875, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 677.796875, "completions/mean_terminated_length": 747.913818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0096, "grad_norm": 0.0, "learning_rate": 2.25e-06, "loss": 0.0, "num_tokens": 2276334.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 9 }, { "calib/answer_extract_rate": 0.01953125, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.0234375, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 776.8125, "completions/mean_terminated_length": 872.2105102539062, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.010666666666666666, "grad_norm": 0.0, "learning_rate": 2.5e-06, "loss": 0.0, "num_tokens": 2557678.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 10 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.03515625, "calib/ece": 0.55, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/mean_conf": 0.55, "calib/mu_c": NaN, "calib/mu_w": 0.55, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.55, "calib/std_conf": 0.45, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 763.171875, "completions/mean_terminated_length": 853.15283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.0, "learning_rate": 2.7500000000000004e-06, "loss": 0.0, "num_tokens": 2833210.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 11 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.44375000000000003, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.44375000000000003, "calib/mu_c": NaN, "calib/mu_w": 0.44375000000000003, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.44375000000000003, "calib/std_conf": 0.35625000000000007, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 722.77734375, "completions/mean_terminated_length": 807.9956665039062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.0, "learning_rate": 3e-06, "loss": 0.0, "num_tokens": 3098097.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 12 }, { "calib/answer_extract_rate": 0.02734375, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.03125, "calib/ece": 0.6, "calib/final_conf_rate": 0.0078125, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.19999999999999996, "calib/mean_conf": 0.9, "calib/mu_c": 0.8, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.0078125, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.01171875, "calib/pce": 0.5, "calib/std_conf": 0.09999999999999998, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 639.03515625, "completions/mean_terminated_length": 702.1159057617188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.001644036383368075, "learning_rate": 3.2500000000000002e-06, "loss": 0.0008, "num_tokens": 3341962.0, "reward": 0.00390625, "reward_std": 0.011048543266952038, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 13 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.01171875, "calib/ece": 0.19999999999999996, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.8, "calib/mu_c": 0.8, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 672.89453125, "completions/mean_terminated_length": 736.1581420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.0014701585751026869, "learning_rate": 3.5e-06, "loss": 0.0094, "num_tokens": 3595303.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 14 }, { "calib/answer_extract_rate": 0.0234375, "calib/avg_num_step_conf": 0.0078125, "calib/ece": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": 1.0, "calib/mu_w": NaN, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.02734375, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 710.86328125, "completions/mean_terminated_length": 771.10595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.0019930792041122913, "learning_rate": 3.7500000000000005e-06, "loss": -0.0004, "num_tokens": 3860844.0, "reward": 0.007889438420534134, "reward_std": 0.022314703091979027, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0034327523317188025, "step": 15 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.05078125, "calib/ece": 1.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 1.0, "calib/mu_c": NaN, "calib/mu_w": 1.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.03515625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 1.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 760.90625, "completions/mean_terminated_length": 854.3508911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "num_tokens": 4140164.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 16 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.00390625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 655.0546875, "completions/mean_terminated_length": 735.5, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.0, "learning_rate": 4.25e-06, "loss": 0.0, "num_tokens": 4387066.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 17 }, { "calib/answer_extract_rate": 0.02734375, "calib/avg_num_step_conf": 0.01953125, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.00390625, "calib/step_conf_rate": 0.00390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 736.14453125, "completions/mean_terminated_length": 805.354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.0, "learning_rate": 4.5e-06, "loss": 0.0, "num_tokens": 4661919.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 18 }, { "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.0, "calib/ece": 0.0, "calib/final_conf_rate": 0.00390625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.0, "calib/mu_c": NaN, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 0.00390625, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.0, "calib/pce": 0.0, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 628.3046875, "completions/mean_terminated_length": 672.9957885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.0, "learning_rate": 4.75e-06, "loss": 0.0, "num_tokens": 4903205.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 19 }, { "calib/answer_extract_rate": 0.0625, "calib/avg_num_step_conf": 0.01953125, "calib/ece": 0.85, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.75, "calib/mean_conf": 0.85, "calib/mu_c": NaN, "calib/mu_w": 0.85, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.85, "calib/std_conf": 0.203100960115899, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2848.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 650.07421875, "completions/mean_terminated_length": 717.3233032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.0009754901984706521, "learning_rate": 5e-06, "loss": 0.0065, "num_tokens": 5150176.0, "reward": 0.003434200072661042, "reward_std": 0.009713384322822094, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0029296875, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0031899246387183666, "step": 20 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.875, "calib/avg_num_step_conf": 0.04296875, "calib/ece": 0.5333333333333333, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": 0.20000000000000007, "calib/mean_conf": 0.8666666666666667, "calib/mu_c": 1.0, "calib/mu_w": 0.7999999999999999, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.5333333333333333, "calib/std_conf": 0.17950549357115014, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2931.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 675.6796875, "completions/mean_terminated_length": 720.7250366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0224, "grad_norm": 0.0026306321378797293, "learning_rate": 4.9722222222222224e-06, "loss": 0.0035, "num_tokens": 5401790.0, "reward": 0.021148759871721268, "reward_std": 0.05981772020459175, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.011484375223517418, "rewards/format_reward_step": 0.015625, "rewards/stepwise_brier_reward": 0.01162628922611475, "step": 21 }, { "calib/answer_extract_rate": 0.0625, "calib/avg_num_step_conf": 0.0078125, "calib/ece": 0.6666666666666666, "calib/final_conf_rate": 0.01171875, "calib/format_rate": 0.00390625, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/mean_conf": 0.6666666666666666, "calib/mu_c": NaN, "calib/mu_w": 0.6666666666666666, "calib/nonempty_final_conf_rate": 0.01171875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0078125, "calib/pce": 0.6666666666666666, "calib/std_conf": 0.4714045207910317, "calib/step_conf_rate": 0.0078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 566.3515625, "completions/mean_terminated_length": 641.5309448242188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.023466666666666667, "grad_norm": 0.0009022683370858431, "learning_rate": 4.944444444444445e-06, "loss": 0.0022, "num_tokens": 5624272.0, "reward": 0.004101060330867767, "reward_std": 0.011599550023674965, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.00390625, "rewards/format_reward_step": 0.00390625, "rewards/stepwise_brier_reward": 0.0039042423013597727, "step": 22 }, { "calib/answer_extract_rate": 0.03515625, "calib/avg_num_step_conf": 0.03515625, "calib/final_conf_rate": 0.0, "calib/format_rate": 0.0, "calib/nonempty_final_conf_rate": 0.0, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.01171875, "calib/step_conf_rate": 0.01171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2889.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 531.07421875, "completions/mean_terminated_length": 606.9420166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.0, "learning_rate": 4.9166666666666665e-06, "loss": 0.0, "num_tokens": 5839843.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 23 }, { "calib/answer_extract_rate": 0.0625, "calib/avg_num_step_conf": 0.07421875, "calib/ece": 0.6125, "calib/final_conf_rate": 0.015625, "calib/format_rate": 0.0, "calib/frac_conf_gt_0.9": 0.0, "calib/mean_conf": 0.6125, "calib/mu_c": NaN, "calib/mu_w": 0.6125, "calib/nonempty_final_conf_rate": 0.015625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.6125, "calib/std_conf": 0.3542156828826189, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 659.71875, "completions/mean_terminated_length": 718.6723022460938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0256, "grad_norm": 0.0, "learning_rate": 4.888888888888889e-06, "loss": 0.0, "num_tokens": 6088923.0, "reward": 0.0, "reward_std": 0.0, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0, "rewards/format_reward_step": 0.0, "rewards/stepwise_brier_reward": 0.0, "step": 24 }, { "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.08333333333333331, "calib/avg_num_step_conf": 0.109375, "calib/ece": 0.784375, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.125, "calib/gap": -0.6125, "calib/mean_conf": 0.609375, "calib/mu_c": 0.15, "calib/mu_w": 0.7625000000000001, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.571875, "calib/std_conf": 0.35353957964420335, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 618.3984375, "completions/mean_terminated_length": 656.8880004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.02666666666666667, "grad_norm": 0.0028888338711112738, "learning_rate": 4.861111111111111e-06, "loss": 0.0091, "num_tokens": 6326137.0, "reward": 0.015379859134554863, "reward_std": 0.043500810861587524, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0011230468517169356, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.0030233401339501143, "step": 25 }, { "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 0.0859375, "calib/ece": 0.309, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.3, "calib/gap": 0.2141666666666665, "calib/mean_conf": 0.709, "calib/mu_c": 0.8374999999999999, "calib/mu_w": 0.6233333333333334, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.309, "calib/std_conf": 0.33738553614522365, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 659.8671875, "completions/mean_terminated_length": 706.8033447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 9.0, "epoch": 0.027733333333333332, "grad_norm": 0.0023705705534666777, "learning_rate": 4.833333333333333e-06, "loss": -0.0318, "num_tokens": 6575983.0, "reward": 0.02112744003534317, "reward_std": 0.059757426381111145, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.0038277343846857548, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.004979296587407589, "step": 26 }, { "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.8214285714285714, "calib/avg_num_step_conf": 0.02734375, "calib/ece": 0.5401111111111111, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.0078125, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": 0.23871428571428566, "calib/mean_conf": 0.7623333333333334, "calib/mu_c": 0.948, "calib/mu_w": 0.7092857142857143, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.01953125, "calib/pce": 0.5401111111111111, "calib/std_conf": 0.26555434681268375, "calib/step_conf_rate": 0.01953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3041.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 663.38671875, "completions/mean_terminated_length": 687.5587158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0288, "grad_norm": 0.0021662807557731867, "learning_rate": 4.805555555555556e-06, "loss": 0.0148, "num_tokens": 6826706.0, "reward": 0.01795351132750511, "reward_std": 0.05078019201755524, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.005313574336469173, "rewards/format_reward_step": 0.0078125, "rewards/stepwise_brier_reward": 0.004936892073601484, "step": 27 }, { "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.9500000000000001, "calib/avg_num_step_conf": 0.10546875, "calib/ece": 0.5206666666666667, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.4166666666666667, "calib/gap": 0.36319999999999997, "calib/mean_conf": 0.6873333333333332, "calib/mu_c": 0.99, "calib/mu_w": 0.6268, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.5206666666666667, "calib/std_conf": 0.3450143314253611, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2855.0, "completions/max_terminated_length": 2855.0, "completions/mean_length": 608.6796875, "completions/mean_terminated_length": 663.0723266601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.029866666666666666, "grad_norm": 0.003722765715792775, "learning_rate": 4.777777777777778e-06, "loss": 0.0091, "num_tokens": 7065152.0, "reward": 0.038684695959091187, "reward_std": 0.09774065762758255, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.014613499864935875, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.014574278146028519, "step": 28 }, { "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.7916666666666667, "calib/avg_num_step_conf": 0.16796875, "calib/ece": 0.7155692307692308, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.38461538461538464, "calib/gap": 0.1706333333333332, "calib/mean_conf": 0.7924923076923077, "calib/mu_c": 0.95, "calib/mu_w": 0.7793666666666668, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7155692307692308, "calib/std_conf": 0.234962682672208, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2535.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 609.9296875, "completions/mean_terminated_length": 642.5596313476562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.030933333333333334, "grad_norm": 0.0025090023409575224, "learning_rate": 4.75e-06, "loss": 0.0229, "num_tokens": 7304102.0, "reward": 0.022272439673542976, "reward_std": 0.05907098948955536, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.007529296912252903, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.014656167477369308, "step": 29 }, { "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.34375, "calib/ece": 0.46705882352941175, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.47058823529411764, "calib/gap": -0.04902777777777789, "calib/mean_conf": 0.8552941176470588, "calib/mu_c": 0.8322222222222222, "calib/mu_w": 0.8812500000000001, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.39647058823529413, "calib/std_conf": 0.1809490551332977, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 691.58203125, "completions/mean_terminated_length": 750.190673828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.003610975109040737, "learning_rate": 4.722222222222222e-06, "loss": 0.0245, "num_tokens": 7563811.0, "reward": 0.07396630942821503, "reward_std": 0.15866075456142426, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.022626563906669617, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.02717459946870804, "step": 30 }, { "calib/answer_extract_rate": 0.17578125, "calib/auroc": 0.615079365079365, "calib/avg_num_step_conf": 0.4375, "calib/ece": 0.5840185185185185, "calib/final_conf_rate": 0.10546875, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.37037037037037035, "calib/gap": 0.025309523809523893, "calib/mean_conf": 0.7469814814814814, "calib/mu_c": 0.7666666666666667, "calib/mu_w": 0.7413571428571428, "calib/nonempty_final_conf_rate": 0.10546875, "calib/nonempty_reasoning_rate": 0.21875, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.5543888888888888, "calib/std_conf": 0.2969400264139109, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 639.22265625, "completions/mean_terminated_length": 665.207275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03306666666666667, "grad_norm": 0.003975396975874901, "learning_rate": 4.694444444444445e-06, "loss": 0.0077, "num_tokens": 7809044.0, "reward": 0.06872516125440598, "reward_std": 0.15826590359210968, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.026396874338388443, "rewards/format_reward_step": 0.0546875, "rewards/stepwise_brier_reward": 0.03148187696933746, "step": 31 }, { "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.3935185185185185, "calib/avg_num_step_conf": 0.2109375, "calib/ece": 0.6371861111111111, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": -0.09652222222222218, "calib/mean_conf": 0.7436472222222221, "calib/mu_c": 0.6712555555555556, "calib/mu_w": 0.7677777777777778, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.5654166666666667, "calib/std_conf": 0.23819251354902501, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2828.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 632.8515625, "completions/mean_terminated_length": 669.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.034133333333333335, "grad_norm": 0.004196561872959137, "learning_rate": 4.666666666666667e-06, "loss": -0.015, "num_tokens": 8053438.0, "reward": 0.06246958300471306, "reward_std": 0.14494431018829346, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.019197266548871994, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.028671305626630783, "step": 32 }, { "calib/answer_extract_rate": 0.23828125, "calib/auroc": 0.27567567567567564, "calib/avg_num_step_conf": 0.33203125, "calib/ece": 0.696779761904762, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": -0.12012837837837842, "calib/mean_conf": 0.8158273809523809, "calib/mu_c": 0.71, "calib/mu_w": 0.8301283783783784, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.27734375, "calib/nonempty_step_conf_rate": 0.12890625, "calib/pce": 0.696779761904762, "calib/std_conf": 0.16470455591263616, "calib/step_conf_rate": 0.12890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2874.0, "completions/max_terminated_length": 2874.0, "completions/mean_length": 615.3125, "completions/mean_terminated_length": 661.8487548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0352, "grad_norm": 0.005306170787662268, "learning_rate": 4.638888888888889e-06, "loss": 0.0001, "num_tokens": 8293510.0, "reward": 0.0785440132021904, "reward_std": 0.17606551945209503, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.023517362773418427, "rewards/format_reward_step": 0.0703125, "rewards/stepwise_brier_reward": 0.04214133322238922, "step": 33 }, { "calib/answer_extract_rate": 0.33984375, "calib/auroc": 0.45524296675191817, "calib/avg_num_step_conf": 0.83984375, "calib/ece": 0.5635978835978837, "calib/final_conf_rate": 0.24609375, "calib/format_rate": 0.1484375, "calib/frac_conf_gt_0.9": 0.4603174603174603, "calib/gap": 0.010822676896845906, "calib/mean_conf": 0.8205291005291003, "calib/mu_c": 0.8284313725490197, "calib/mu_w": 0.8176086956521738, "calib/nonempty_final_conf_rate": 0.24609375, "calib/nonempty_reasoning_rate": 0.4140625, "calib/nonempty_step_conf_rate": 0.26171875, "calib/pce": 0.5571428571428572, "calib/std_conf": 0.22805689932454587, "calib/step_conf_rate": 0.26171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 594.96484375, "completions/mean_terminated_length": 624.2254028320312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.03626666666666667, "grad_norm": 0.006227437872439623, "learning_rate": 4.611111111111112e-06, "loss": 0.0756, "num_tokens": 8526613.0, "reward": 0.18159806728363037, "reward_std": 0.33083945512771606, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.06646484136581421, "rewards/format_reward_step": 0.1484375, "rewards/stepwise_brier_reward": 0.10283762216567993, "step": 34 }, { "calib/answer_extract_rate": 0.359375, "calib/auroc": 0.3589409722222222, "calib/avg_num_step_conf": 0.7578125, "calib/ece": 0.578890243902439, "calib/final_conf_rate": 0.3203125, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.2926829268292683, "calib/gap": -0.06477604166666662, "calib/mean_conf": 0.758890243902439, "calib/mu_c": 0.7083333333333334, "calib/mu_w": 0.773109375, "calib/nonempty_final_conf_rate": 0.3203125, "calib/nonempty_reasoning_rate": 0.40625, "calib/nonempty_step_conf_rate": 0.23046875, "calib/pce": 0.5591341463414634, "calib/std_conf": 0.2720138987705054, "calib/step_conf_rate": 0.23046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2935.0, "completions/max_terminated_length": 2935.0, "completions/mean_length": 694.01171875, "completions/mean_terminated_length": 722.2235717773438, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.037333333333333336, "grad_norm": 0.004874664358794689, "learning_rate": 4.583333333333333e-06, "loss": 0.0596, "num_tokens": 8789216.0, "reward": 0.1838405728340149, "reward_std": 0.3406270444393158, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.06637898087501526, "rewards/format_reward_step": 0.16015625, "rewards/stepwise_brier_reward": 0.09791683405637741, "step": 35 }, { "calib/answer_extract_rate": 0.59375, "calib/auroc": 0.5722870879120879, "calib/avg_num_step_conf": 1.77734375, "calib/ece": 0.5842804878048782, "calib/final_conf_rate": 0.48046875, "calib/format_rate": 0.22265625, "calib/frac_conf_gt_0.9": 0.3089430894308943, "calib/gap": 0.012974416208791473, "calib/mean_conf": 0.8021666666666667, "calib/mu_c": 0.811765625, "calib/mu_w": 0.7987912087912086, "calib/nonempty_final_conf_rate": 0.48046875, "calib/nonempty_reasoning_rate": 0.6875, "calib/nonempty_step_conf_rate": 0.3828125, "calib/pce": 0.5631422764227643, "calib/std_conf": 0.20696175824700988, "calib/step_conf_rate": 0.3828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2872.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 560.46484375, "completions/mean_terminated_length": 569.3611450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.0384, "grad_norm": 0.00626989733427763, "learning_rate": 4.555555555555556e-06, "loss": 0.0187, "num_tokens": 9011087.0, "reward": 0.3228808045387268, "reward_std": 0.4628177881240845, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.10468141734600067, "rewards/format_reward_step": 0.22265625, "rewards/stepwise_brier_reward": 0.15872274339199066, "step": 36 }, { "calib/answer_extract_rate": 0.5390625, "calib/auroc": 0.5586516853932585, "calib/avg_num_step_conf": 1.17578125, "calib/ece": 0.5739002923976608, "calib/final_conf_rate": 0.4453125, "calib/format_rate": 0.26953125, "calib/frac_conf_gt_0.9": 0.30701754385964913, "calib/gap": 0.05819513108614238, "calib/mean_conf": 0.7905669590643274, "calib/mu_c": 0.836, "calib/mu_w": 0.7778048689138576, "calib/nonempty_final_conf_rate": 0.4453125, "calib/nonempty_reasoning_rate": 0.64453125, "calib/nonempty_step_conf_rate": 0.41796875, "calib/pce": 0.5725845029239767, "calib/std_conf": 0.21448465470792466, "calib/step_conf_rate": 0.41796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 530.2578125, "completions/mean_terminated_length": 536.5454711914062, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.039466666666666664, "grad_norm": 0.007074254099279642, "learning_rate": 4.527777777777778e-06, "loss": 0.0148, "num_tokens": 9229609.0, "reward": 0.30819064378738403, "reward_std": 0.42235076427459717, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.12816710770130157, "rewards/format_reward_step": 0.26953125, "rewards/stepwise_brier_reward": 0.1842409372329712, "step": 37 }, { "calib/answer_extract_rate": 0.64453125, "calib/auroc": 0.4983548766157461, "calib/avg_num_step_conf": 1.26953125, "calib/ece": 0.5826423245614034, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.34765625, "calib/frac_conf_gt_0.9": 0.34210526315789475, "calib/gap": 0.00913895025460254, "calib/mean_conf": 0.8129054824561404, "calib/mu_c": 0.8198198198198199, "calib/mu_w": 0.8106808695652173, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.70703125, "calib/nonempty_step_conf_rate": 0.43359375, "calib/pce": 0.5760633771929824, "calib/std_conf": 0.2098056093275935, "calib/step_conf_rate": 0.43359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 525.546875, "completions/mean_terminated_length": 536.0159301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.04053333333333333, "grad_norm": 0.006136655807495117, "learning_rate": 4.5e-06, "loss": 0.0025, "num_tokens": 9446717.0, "reward": 0.4005172848701477, "reward_std": 0.5092767477035522, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.17309755086898804, "rewards/format_reward_step": 0.34765625, "rewards/stepwise_brier_reward": 0.21368655562400818, "step": 38 }, { "calib/answer_extract_rate": 0.73046875, "calib/auroc": 0.5160208643815202, "calib/avg_num_step_conf": 1.578125, "calib/ece": 0.5831737634408604, "calib/final_conf_rate": 0.60546875, "calib/format_rate": 0.31640625, "calib/frac_conf_gt_0.9": 0.3548387096774194, "calib/gap": 0.035850190428878936, "calib/mean_conf": 0.781883440860215, "calib/mu_c": 0.8101010101010101, "calib/mu_w": 0.7742508196721312, "calib/nonempty_final_conf_rate": 0.60546875, "calib/nonempty_reasoning_rate": 0.796875, "calib/nonempty_step_conf_rate": 0.4375, "calib/pce": 0.5760769892473119, "calib/std_conf": 0.2403091633074413, "calib/step_conf_rate": 0.4375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2433.0, "completions/max_terminated_length": 2433.0, "completions/mean_length": 528.0625, "completions/mean_terminated_length": 530.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.0416, "grad_norm": 0.00601692171767354, "learning_rate": 4.472222222222223e-06, "loss": -0.02, "num_tokens": 9663669.0, "reward": 0.36516085267066956, "reward_std": 0.4656815230846405, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.14888453483581543, "rewards/format_reward_step": 0.31640625, "rewards/stepwise_brier_reward": 0.2050618827342987, "step": 39 }, { "calib/answer_extract_rate": 0.69140625, "calib/auroc": 0.5190118152524168, "calib/avg_num_step_conf": 2.08203125, "calib/ece": 0.5816113095238096, "calib/final_conf_rate": 0.65625, "calib/format_rate": 0.41015625, "calib/frac_conf_gt_0.9": 0.2916666666666667, "calib/gap": 0.010224812030075303, "calib/mean_conf": 0.7725434523809525, "calib/mu_c": 0.7806380952380954, "calib/mu_w": 0.77041328320802, "calib/nonempty_final_conf_rate": 0.65625, "calib/nonempty_reasoning_rate": 0.80859375, "calib/nonempty_step_conf_rate": 0.55859375, "calib/pce": 0.5729107142857142, "calib/std_conf": 0.2338081530174817, "calib/step_conf_rate": 0.55859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 515.50390625, "completions/mean_terminated_length": 519.56298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.042666666666666665, "grad_norm": 0.005652237217873335, "learning_rate": 4.444444444444444e-06, "loss": 0.0625, "num_tokens": 9878078.0, "reward": 0.4190334677696228, "reward_std": 0.45665931701660156, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.17388327419757843, "rewards/format_reward_step": 0.41015625, "rewards/stepwise_brier_reward": 0.24242988228797913, "step": 40 }, { "calib/answer_extract_rate": 0.80078125, "calib/auroc": 0.5799282018794214, "calib/avg_num_step_conf": 1.85546875, "calib/ece": 0.4081126666666667, "calib/final_conf_rate": 0.78125, "calib/format_rate": 0.5, "calib/frac_conf_gt_0.9": 0.225, "calib/gap": 0.07252860310421272, "calib/mean_conf": 0.7779706666666667, "calib/mu_c": 0.8225757575757575, "calib/mu_w": 0.7500471544715448, "calib/nonempty_final_conf_rate": 0.78125, "calib/nonempty_reasoning_rate": 0.8671875, "calib/nonempty_step_conf_rate": 0.6015625, "calib/pce": 0.4005416666666667, "calib/std_conf": 0.2304185956654637, "calib/step_conf_rate": 0.6015625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 468.9921875, "completions/mean_terminated_length": 468.9921875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.04373333333333333, "grad_norm": 0.006907930597662926, "learning_rate": 4.416666666666667e-06, "loss": 0.071, "num_tokens": 10081068.0, "reward": 0.6845973134040833, "reward_std": 0.6138066053390503, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.28266096115112305, "rewards/format_reward_step": 0.5, "rewards/stepwise_brier_reward": 0.3386923670768738, "step": 41 }, { "calib/answer_extract_rate": 0.875, "calib/auroc": 0.5038810604251254, "calib/avg_num_step_conf": 1.890625, "calib/ece": 0.5712902053712481, "calib/final_conf_rate": 0.82421875, "calib/format_rate": 0.55859375, "calib/frac_conf_gt_0.9": 0.2985781990521327, "calib/gap": 0.0056131040522252595, "calib/mean_conf": 0.8066930489731438, "calib/mu_c": 0.8108962264150945, "calib/mu_w": 0.8052831223628693, "calib/nonempty_final_conf_rate": 0.82421875, "calib/nonempty_reasoning_rate": 0.91015625, "calib/nonempty_step_conf_rate": 0.62890625, "calib/pce": 0.5633992101105845, "calib/std_conf": 0.20060389681325932, "calib/step_conf_rate": 0.62890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 411.25, "completions/mean_terminated_length": 416.1264953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.0448, "grad_norm": 0.007180627901107073, "learning_rate": 4.388888888888889e-06, "loss": -0.0196, "num_tokens": 10266396.0, "reward": 0.6159687042236328, "reward_std": 0.55170738697052, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.25868675112724304, "rewards/format_reward_step": 0.55859375, "rewards/stepwise_brier_reward": 0.35431385040283203, "step": 42 }, { "calib/answer_extract_rate": 0.875, "calib/auroc": 0.6032567049808429, "calib/avg_num_step_conf": 2.55859375, "calib/ece": 0.5645814307458142, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.6171875, "calib/frac_conf_gt_0.9": 0.228310502283105, "calib/gap": 0.07952490421455949, "calib/mean_conf": 0.7617047184170472, "calib/mu_c": 0.824888888888889, "calib/mu_w": 0.7453639846743295, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 0.92578125, "calib/nonempty_step_conf_rate": 0.7109375, "calib/pce": 0.5604033485540334, "calib/std_conf": 0.22752748859204217, "calib/step_conf_rate": 0.7109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 429.84765625, "completions/mean_terminated_length": 429.84765625, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.04586666666666667, "grad_norm": 0.0070141577161848545, "learning_rate": 4.361111111111112e-06, "loss": 0.0093, "num_tokens": 10457341.0, "reward": 0.6214162707328796, "reward_std": 0.550723671913147, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.31000176072120667, "rewards/format_reward_step": 0.6171875, "rewards/stepwise_brier_reward": 0.4062865376472473, "step": 43 }, { "calib/answer_extract_rate": 0.90625, "calib/auroc": 0.5537568306010929, "calib/avg_num_step_conf": 2.48046875, "calib/ece": 0.578165919282511, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.6796875, "calib/frac_conf_gt_0.9": 0.24663677130044842, "calib/gap": 0.04479785974499084, "calib/mean_conf": 0.7552959641255605, "calib/mu_c": 0.7920583333333333, "calib/mu_w": 0.7472604735883425, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.7578125, "calib/pce": 0.5770448430493272, "calib/std_conf": 0.2368102410848513, "calib/step_conf_rate": 0.7578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 447.55078125, "completions/mean_terminated_length": 449.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.046933333333333334, "grad_norm": 0.006897160783410072, "learning_rate": 4.333333333333334e-06, "loss": 0.0952, "num_tokens": 10653914.0, "reward": 0.6419980525970459, "reward_std": 0.4891486167907715, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.324765682220459, "rewards/format_reward_step": 0.6796875, "rewards/stepwise_brier_reward": 0.41533565521240234, "step": 44 }, { "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.5399687395333259, "calib/avg_num_step_conf": 2.99609375, "calib/ece": 0.5009142177177177, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 0.22972972972972974, "calib/gap": 0.05840739604034073, "calib/mean_conf": 0.7182419384384385, "calib/mu_c": 0.7627052264150942, "calib/mu_w": 0.7042978303747535, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.859375, "calib/pce": 0.4902087087087087, "calib/std_conf": 0.27369767059658495, "calib/step_conf_rate": 0.859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 435.97265625, "completions/mean_terminated_length": 435.97265625, "completions/min_length": 8.0, "completions/min_terminated_length": 8.0, "epoch": 0.048, "grad_norm": 0.006634102202951908, "learning_rate": 4.305555555555556e-06, "loss": 0.0477, "num_tokens": 10846251.0, "reward": 0.7824665904045105, "reward_std": 0.5265895128250122, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.4158342778682709, "rewards/format_reward_step": 0.77734375, "rewards/stepwise_brier_reward": 0.5216355323791504, "step": 45 }, { "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.4556926210955182, "calib/avg_num_step_conf": 3.5, "calib/ece": 0.5328596364235632, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.16170212765957448, "calib/gap": -0.03243677957200686, "calib/mean_conf": 0.7069770832320738, "calib/mu_c": 0.6810276595744682, "calib/mu_w": 0.7134644391464751, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.85546875, "calib/pce": 0.5199183598278184, "calib/std_conf": 0.26160604105809415, "calib/step_conf_rate": 0.85546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 444.10546875, "completions/mean_terminated_length": 444.10546875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.04906666666666667, "grad_norm": 0.006138576194643974, "learning_rate": 4.277777777777778e-06, "loss": 0.0768, "num_tokens": 11040390.0, "reward": 0.7579419016838074, "reward_std": 0.4797509014606476, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.39381933212280273, "rewards/format_reward_step": 0.7890625, "rewards/stepwise_brier_reward": 0.5316290259361267, "step": 46 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.534437146506112, "calib/avg_num_step_conf": 3.515625, "calib/ece": 0.516381856538678, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.8359375, "calib/frac_conf_gt_0.9": 0.22784810126582278, "calib/gap": 0.010183588755883877, "calib/mean_conf": 0.7540049226427566, "calib/mu_c": 0.7614814814761903, "calib/mu_w": 0.7512978927203064, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.502281997185654, "calib/std_conf": 0.22681493785340529, "calib/step_conf_rate": 0.90625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2132.0, "completions/max_terminated_length": 2132.0, "completions/mean_length": 427.46484375, "completions/mean_terminated_length": 429.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.050133333333333335, "grad_norm": 0.006385274697095156, "learning_rate": 4.25e-06, "loss": 0.044, "num_tokens": 11231477.0, "reward": 0.8602737188339233, "reward_std": 0.5443920493125916, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.4434400200843811, "rewards/format_reward_step": 0.8359375, "rewards/stepwise_brier_reward": 0.5667149424552917, "step": 47 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5800638913846461, "calib/avg_num_step_conf": 3.41015625, "calib/ece": 0.4818471074380165, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.1446280991735537, "calib/gap": 0.08433752620545076, "calib/mean_conf": 0.7003595041322315, "calib/mu_c": 0.7662264150943398, "calib/mu_w": 0.681888888888889, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.48159917355371895, "calib/std_conf": 0.25700948404487006, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1335.0, "completions/max_terminated_length": 1335.0, "completions/mean_length": 395.3203125, "completions/mean_terminated_length": 396.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.0512, "grad_norm": 0.007040057796984911, "learning_rate": 4.222222222222223e-06, "loss": 0.0314, "num_tokens": 11412047.0, "reward": 0.8526144027709961, "reward_std": 0.5278269648551941, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.47880131006240845, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.5825426578521729, "step": 48 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.567470664928292, "calib/avg_num_step_conf": 3.8203125, "calib/ece": 0.4378973886583678, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 0.0995850622406639, "calib/gap": 0.05385923586018515, "calib/mean_conf": 0.6501413388658368, "calib/mu_c": 0.6908151186440679, "calib/mu_w": 0.6369558827838827, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.42161272475795286, "calib/std_conf": 0.26861273252570295, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2198.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 411.94921875, "completions/mean_terminated_length": 411.94921875, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.05226666666666667, "grad_norm": 0.006596356630325317, "learning_rate": 4.194444444444445e-06, "loss": 0.0017, "num_tokens": 11597722.0, "reward": 0.8926715850830078, "reward_std": 0.5276837348937988, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.5090051293373108, "rewards/format_reward_step": 0.84375, "rewards/stepwise_brier_reward": 0.5714260339736938, "step": 49 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5872340425531916, "calib/avg_num_step_conf": 3.265625, "calib/ece": 0.4700798898071625, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.10330578512396695, "calib/gap": 0.07569354428077846, "calib/mean_conf": 0.6575179063360881, "calib/mu_c": 0.7185106382978724, "calib/mu_w": 0.642817094017094, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.4666914600550964, "calib/std_conf": 0.26108166766519925, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1155.0, "completions/max_terminated_length": 1155.0, "completions/mean_length": 442.37109375, "completions/mean_terminated_length": 444.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.05333333333333334, "grad_norm": 0.006352792028337717, "learning_rate": 4.166666666666667e-06, "loss": -0.049, "num_tokens": 11792009.0, "reward": 0.8802676200866699, "reward_std": 0.5019233822822571, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.5297186970710754, "rewards/format_reward_step": 0.89453125, "rewards/stepwise_brier_reward": 0.6381955146789551, "step": 50 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5470850169044282, "calib/avg_num_step_conf": 3.0625, "calib/ece": 0.37765926075268813, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.08064516129032258, "calib/gap": 0.05392048514334402, "calib/mean_conf": 0.6316915188172042, "calib/mu_c": 0.6710447761194029, "calib/mu_w": 0.6171242909760589, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.3695947446236559, "calib/std_conf": 0.26770725785027366, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2372.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 450.21875, "completions/mean_terminated_length": 450.21875, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.0544, "grad_norm": 0.005892525427043438, "learning_rate": 4.138888888888889e-06, "loss": 0.018, "num_tokens": 11992241.0, "reward": 0.9921143054962158, "reward_std": 0.4867195188999176, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.5696330666542053, "rewards/format_reward_step": 0.91796875, "rewards/stepwise_brier_reward": 0.6495035886764526, "step": 51 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5020485345099275, "calib/avg_num_step_conf": 2.91015625, "calib/ece": 0.3590770941426612, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.09876543209876543, "calib/gap": 0.001355779917007971, "calib/mean_conf": 0.5949893026474623, "calib/mu_c": 0.5959210526315789, "calib/mu_w": 0.5945652727145709, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.3206545975720165, "calib/std_conf": 0.2748466045476414, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2351.0, "completions/max_terminated_length": 2351.0, "completions/mean_length": 460.1640625, "completions/mean_terminated_length": 461.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.055466666666666664, "grad_norm": 0.0061750151216983795, "learning_rate": 4.111111111111111e-06, "loss": 0.0662, "num_tokens": 12193675.0, "reward": 1.0142697095870972, "reward_std": 0.5381784439086914, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.5638716220855713, "rewards/format_reward_step": 0.89453125, "rewards/stepwise_brier_reward": 0.6527732610702515, "step": 52 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5710921401236385, "calib/avg_num_step_conf": 2.70703125, "calib/ece": 0.295949535192563, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": 0.06633402021391421, "calib/mean_conf": 0.5518857901726427, "calib/mu_c": 0.5973417721518987, "calib/mu_w": 0.5310077519379844, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.266547144754316, "calib/std_conf": 0.2802968818600421, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 441.8515625, "completions/mean_terminated_length": 443.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.05653333333333333, "grad_norm": 0.0061105904169380665, "learning_rate": 4.083333333333334e-06, "loss": -0.0193, "num_tokens": 12388293.0, "reward": 1.1016957759857178, "reward_std": 0.4669555127620697, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.648909330368042, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7152143120765686, "step": 53 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46703296703296704, "calib/avg_num_step_conf": 2.9453125, "calib/ece": 0.3181579301075268, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.03225806451612903, "calib/gap": -0.03299165908401591, "calib/mean_conf": 0.5291001344086022, "calib/mu_c": 0.5082142857142856, "calib/mu_w": 0.5412059447983015, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.2401612903225806, "calib/std_conf": 0.28768419210339424, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1200.0, "completions/max_terminated_length": 1200.0, "completions/mean_length": 398.16015625, "completions/mean_terminated_length": 399.7215881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0576, "grad_norm": 0.007216614205390215, "learning_rate": 4.055555555555556e-06, "loss": 0.0037, "num_tokens": 12572134.0, "reward": 1.1137436628341675, "reward_std": 0.5061899423599243, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.5936059951782227, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7083876132965088, "step": 54 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5846567411083541, "calib/avg_num_step_conf": 2.75, "calib/ece": 0.267946015936255, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.07323633581472294, "calib/mean_conf": 0.4770838645418327, "calib/mu_c": 0.5313546153846155, "calib/mu_w": 0.45811827956989254, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.24303286852589645, "calib/std_conf": 0.2663581623790452, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2425.0, "completions/max_terminated_length": 2425.0, "completions/mean_length": 447.69921875, "completions/mean_terminated_length": 447.69921875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.058666666666666666, "grad_norm": 0.005838231183588505, "learning_rate": 4.027777777777779e-06, "loss": 0.0461, "num_tokens": 12770249.0, "reward": 1.063176155090332, "reward_std": 0.416034460067749, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.6880024671554565, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7173251509666443, "step": 55 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4855025773195877, "calib/avg_num_step_conf": 2.75, "calib/ece": 0.27800292456666675, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.016, "calib/gap": -0.0017181261720667362, "calib/mean_conf": 0.4046904087666667, "calib/mu_c": 0.40335714285714286, "calib/mu_w": 0.4050752690292096, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.22934666666666673, "calib/std_conf": 0.2744626342017327, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 463.6484375, "completions/mean_terminated_length": 463.6484375, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.05973333333333333, "grad_norm": 0.0065548000857234, "learning_rate": 4.000000000000001e-06, "loss": 0.0268, "num_tokens": 12971463.0, "reward": 1.0270369052886963, "reward_std": 0.3856312036514282, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.6830077767372131, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.7233819961547852, "step": 56 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5052573529411765, "calib/avg_num_step_conf": 2.5703125, "calib/ece": 0.23630473130666665, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": 0.002304751921568593, "calib/mean_conf": 0.36149526869333337, "calib/mu_c": 0.36306249999999995, "calib/mu_w": 0.36075774807843136, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.13890000000000002, "calib/std_conf": 0.2615626427706827, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2112.0, "completions/max_terminated_length": 2112.0, "completions/mean_length": 439.7578125, "completions/mean_terminated_length": 439.7578125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.0608, "grad_norm": 0.0059964158572256565, "learning_rate": 3.972222222222223e-06, "loss": 0.0257, "num_tokens": 13166513.0, "reward": 1.1208088397979736, "reward_std": 0.4135684370994568, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6733556389808655, "rewards/format_reward_step": 0.9453125, "rewards/stepwise_brier_reward": 0.7521493434906006, "step": 57 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5710806697108066, "calib/avg_num_step_conf": 2.44921875, "calib/ece": 0.16502314624505926, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.06396107214611874, "calib/mean_conf": 0.3664393043478261, "calib/mu_c": 0.41194520547945207, "calib/mu_w": 0.34798413333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.12146245059288535, "calib/std_conf": 0.24582607562520056, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2526.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 470.84375, "completions/mean_terminated_length": 470.84375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.06186666666666667, "grad_norm": 0.00580226257443428, "learning_rate": 3.944444444444445e-06, "loss": 0.0212, "num_tokens": 13369049.0, "reward": 1.1401081085205078, "reward_std": 0.36763840913772583, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7382141351699829, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7668165564537048, "step": 58 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49960278053624635, "calib/avg_num_step_conf": 2.328125, "calib/ece": 0.24439829396325458, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.014792794880282567, "calib/mean_conf": 0.38057152230971125, "calib/mu_c": 0.3898315789473686, "calib/mu_w": 0.375038784067086, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1254770341207349, "calib/std_conf": 0.2619453349670135, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1851.0, "completions/max_terminated_length": 1851.0, "completions/mean_length": 454.171875, "completions/mean_terminated_length": 454.171875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.06293333333333333, "grad_norm": 0.00570636847987771, "learning_rate": 3.916666666666667e-06, "loss": 0.0191, "num_tokens": 13567245.0, "reward": 1.2028120756149292, "reward_std": 0.38464105129241943, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6876335740089417, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7750437259674072, "step": 59 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5375184516975562, "calib/avg_num_step_conf": 2.109375, "calib/ece": 0.16830111807228917, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.03220580226340819, "calib/mean_conf": 0.3078032995983936, "calib/mu_c": 0.33134328358208953, "calib/mu_w": 0.29913748131868134, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1035140562248996, "calib/std_conf": 0.22954419704442436, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2593.0, "completions/max_terminated_length": 2593.0, "completions/mean_length": 472.59765625, "completions/mean_terminated_length": 474.4510192871094, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.064, "grad_norm": 0.005720623768866062, "learning_rate": 3.88888888888889e-06, "loss": 0.0277, "num_tokens": 13772766.0, "reward": 1.1164627075195312, "reward_std": 0.35981249809265137, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7337855100631714, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7935928106307983, "step": 60 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.551148761391349, "calib/avg_num_step_conf": 2.51953125, "calib/ece": 0.2384769433465085, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.01993505326658962, "calib/mean_conf": 0.3057127799736496, "calib/mu_c": 0.3172955974842767, "calib/mu_w": 0.2973605442176871, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06260869565217392, "calib/std_conf": 0.23264621599619584, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1869.0, "completions/max_terminated_length": 1869.0, "completions/mean_length": 399.1875, "completions/mean_terminated_length": 400.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.06506666666666666, "grad_norm": 0.006268184166401625, "learning_rate": 3.861111111111112e-06, "loss": 0.0118, "num_tokens": 13954702.0, "reward": 1.254476547241211, "reward_std": 0.3372269570827484, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6911241412162781, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7934705018997192, "step": 61 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5436766623207302, "calib/avg_num_step_conf": 1.890625, "calib/ece": 0.18494039215686273, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006246849196001603, "calib/mean_conf": 0.25341254901960786, "calib/mu_c": 0.25384615384615383, "calib/mu_w": 0.25322146892655367, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06623529411764706, "calib/std_conf": 0.2003308169870002, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1528.0, "completions/max_terminated_length": 1528.0, "completions/mean_length": 439.51171875, "completions/mean_terminated_length": 441.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.06613333333333334, "grad_norm": 0.006458355579525232, "learning_rate": 3.833333333333334e-06, "loss": 0.0026, "num_tokens": 14149977.0, "reward": 1.1732728481292725, "reward_std": 0.33111098408699036, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7326979041099548, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8323830962181091, "step": 62 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49906191369606007, "calib/avg_num_step_conf": 1.890625, "calib/ece": 0.18821015936254984, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001516434550440121, "calib/mean_conf": 0.2428655378486056, "calib/mu_c": 0.241844512195122, "calib/mu_w": 0.24336094674556213, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05219123505976098, "calib/std_conf": 0.1834396838939196, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2843.0, "completions/max_terminated_length": 2843.0, "completions/mean_length": 458.81640625, "completions/mean_terminated_length": 458.81640625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.0672, "grad_norm": 0.006227673031389713, "learning_rate": 3.8055555555555556e-06, "loss": 0.04, "num_tokens": 14351754.0, "reward": 1.1826722621917725, "reward_std": 0.35445255041122437, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.7207086682319641, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8361467719078064, "step": 63 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5357548999933106, "calib/avg_num_step_conf": 1.9453125, "calib/ece": 0.21857719999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026542624924744185, "calib/mean_conf": 0.1894228, "calib/mu_c": 0.20545454545454553, "calib/mu_w": 0.17891192052980134, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.006000000000000001, "calib/std_conf": 0.15477011339454397, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2297.0, "completions/max_terminated_length": 2297.0, "completions/mean_length": 452.79296875, "completions/mean_terminated_length": 454.56866455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.06826666666666667, "grad_norm": 0.006078002508729696, "learning_rate": 3.777777777777778e-06, "loss": 0.0262, "num_tokens": 14547125.0, "reward": 1.216360330581665, "reward_std": 0.344184935092926, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.672036349773407, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8104315400123596, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5056730769230768, "calib/avg_num_step_conf": 2.03125, "calib/ece": 0.274, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.015825641025641074, "calib/mean_conf": 0.19084375000000003, "calib/mu_c": 0.1812, "calib/mu_w": 0.19702564102564107, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.037109375, "calib/std_conf": 0.150726933221762, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1287.0, "completions/max_terminated_length": 1287.0, "completions/mean_length": 382.0625, "completions/mean_terminated_length": 383.5608215332031, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.06933333333333333, "grad_norm": 0.006557927932590246, "learning_rate": 3.7500000000000005e-06, "loss": 0.0233, "num_tokens": 14725637.0, "reward": 1.2358492612838745, "reward_std": 0.24881812930107117, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6803131699562073, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.834333062171936, "step": 65 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.581155741376736, "calib/avg_num_step_conf": 1.6953125, "calib/ece": 0.15649803921568628, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03742593698671051, "calib/mean_conf": 0.16428627450980393, "calib/mu_c": 0.1908513513513514, "calib/mu_w": 0.15342541436464088, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.015294117647058828, "calib/std_conf": 0.1362584640574566, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1797.0, "completions/max_terminated_length": 1797.0, "completions/mean_length": 459.1484375, "completions/mean_terminated_length": 460.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.0704, "grad_norm": 0.006001452449709177, "learning_rate": 3.7222222222222225e-06, "loss": 0.023, "num_tokens": 14925211.0, "reward": 1.181628942489624, "reward_std": 0.22775799036026, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7677407264709473, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8488467931747437, "step": 66 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5434293429342935, "calib/avg_num_step_conf": 1.84375, "calib/ece": 0.26070901960784315, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.026889533239038255, "calib/mean_conf": 0.16203607843137255, "calib/mu_c": 0.17827524752475254, "calib/mu_w": 0.1513857142857143, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013333333333333332, "calib/std_conf": 0.1439163709784321, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1576.0, "completions/max_terminated_length": 1576.0, "completions/mean_length": 422.50390625, "completions/mean_terminated_length": 424.1607971191406, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.07146666666666666, "grad_norm": 0.005936103407293558, "learning_rate": 3.694444444444445e-06, "loss": 0.0276, "num_tokens": 15114060.0, "reward": 1.252760410308838, "reward_std": 0.26562878489494324, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6947064399719238, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.852878749370575, "step": 67 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.583870101986044, "calib/avg_num_step_conf": 1.57421875, "calib/ece": 0.22806299212598427, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02360815888352119, "calib/mean_conf": 0.15146456692913388, "calib/mu_c": 0.16652173913043478, "calib/mu_w": 0.14291358024691359, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.008661417322834646, "calib/std_conf": 0.11778331869080748, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1164.0, "completions/max_terminated_length": 1164.0, "completions/mean_length": 406.69140625, "completions/mean_terminated_length": 408.2862854003906, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.07253333333333334, "grad_norm": 0.006685419473797083, "learning_rate": 3.6666666666666666e-06, "loss": 0.0119, "num_tokens": 15297941.0, "reward": 1.2206246852874756, "reward_std": 0.2626305818557739, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.7107387781143188, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8469581007957458, "step": 68 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5575284090909091, "calib/avg_num_step_conf": 1.46875, "calib/ece": 0.17670634920634926, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021387559808612455, "calib/mean_conf": 0.13440476190476192, "calib/mu_c": 0.14934210526315791, "calib/mu_w": 0.12795454545454546, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.004761904761904762, "calib/std_conf": 0.08806136736258817, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 441.32421875, "completions/mean_terminated_length": 443.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.0736, "grad_norm": 0.005892947781831026, "learning_rate": 3.638888888888889e-06, "loss": 0.0477, "num_tokens": 15491096.0, "reward": 1.16652512550354, "reward_std": 0.2544803023338318, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7387734651565552, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8385534882545471, "step": 69 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5252142857142857, "calib/avg_num_step_conf": 1.4140625, "calib/ece": 0.19878039215686275, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01270500000000005, "calib/mean_conf": 0.13690588235294118, "calib/mu_c": 0.14562500000000003, "calib/mu_w": 0.13291999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010980392156862747, "calib/std_conf": 0.10751707098704472, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1628.0, "completions/max_terminated_length": 1628.0, "completions/mean_length": 419.92578125, "completions/mean_terminated_length": 421.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.07466666666666667, "grad_norm": 0.006532968487590551, "learning_rate": 3.6111111111111115e-06, "loss": 0.0464, "num_tokens": 15681269.0, "reward": 1.207397699356079, "reward_std": 0.19588616490364075, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.74442458152771, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8798038959503174, "step": 70 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6059593428014481, "calib/avg_num_step_conf": 1.43359375, "calib/ece": 0.21764705882352942, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025062656641603995, "calib/mean_conf": 0.11176470588235295, "calib/mu_c": 0.12857142857142856, "calib/mu_w": 0.10350877192982456, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05600790848754119, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 443.25390625, "completions/mean_terminated_length": 443.25390625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.07573333333333333, "grad_norm": 0.006109386682510376, "learning_rate": 3.5833333333333335e-06, "loss": 0.0309, "num_tokens": 15874830.0, "reward": 1.207634687423706, "reward_std": 0.24222128093242645, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.7290421724319458, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8740170001983643, "step": 71 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5144973544973545, "calib/avg_num_step_conf": 1.3046875, "calib/ece": 0.21828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014476190476190726, "calib/mean_conf": 0.11765625000000002, "calib/mu_c": 0.11666666666666665, "calib/mu_w": 0.11811428571428573, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009765624999999997, "calib/std_conf": 0.07070498098392715, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1094.0, "completions/max_terminated_length": 1094.0, "completions/mean_length": 368.89453125, "completions/mean_terminated_length": 370.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.0768, "grad_norm": 0.006662310566753149, "learning_rate": 3.555555555555556e-06, "loss": -0.0097, "num_tokens": 16049355.0, "reward": 1.205643892288208, "reward_std": 0.22846034169197083, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.7347124814987183, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8922134637832642, "step": 72 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5062370062370062, "calib/avg_num_step_conf": 1.1796875, "calib/ece": 0.32673228346456684, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012770112770112696, "calib/mean_conf": 0.1165748031496063, "calib/mu_c": 0.11585585585585587, "calib/mu_w": 0.11713286713286714, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0031496062992125966, "calib/std_conf": 0.06897220936240427, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 945.0, "completions/max_terminated_length": 945.0, "completions/mean_length": 378.60546875, "completions/mean_terminated_length": 380.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.07786666666666667, "grad_norm": 0.006181009113788605, "learning_rate": 3.5277777777777784e-06, "loss": -0.0322, "num_tokens": 16228990.0, "reward": 1.2618048191070557, "reward_std": 0.19632968306541443, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6369625329971313, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8529819250106812, "step": 73 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5107733175914994, "calib/avg_num_step_conf": 1.2890625, "calib/ece": 0.1924466403162055, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003162337662337658, "calib/mean_conf": 0.11664426877470357, "calib/mu_c": 0.11884415584415584, "calib/mu_w": 0.11568181818181818, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0023715415019762843, "calib/std_conf": 0.06966356145946241, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 369.80859375, "completions/mean_terminated_length": 371.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.07893333333333333, "grad_norm": 0.006986550521105528, "learning_rate": 3.5e-06, "loss": -0.0384, "num_tokens": 16403269.0, "reward": 1.1860867738723755, "reward_std": 0.19439929723739624, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7368823885917664, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8705822229385376, "step": 74 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5315934065934066, "calib/avg_num_step_conf": 1.39453125, "calib/ece": 0.39097656249999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032564102564102693, "calib/mean_conf": 0.10667968750000001, "calib/mu_c": 0.10833333333333335, "calib/mu_w": 0.10507692307692308, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002734375, "calib/std_conf": 0.05525783790470221, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1048.0, "completions/max_terminated_length": 1048.0, "completions/mean_length": 355.7109375, "completions/mean_terminated_length": 357.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.08, "grad_norm": 0.006790259387344122, "learning_rate": 3.4722222222222224e-06, "loss": 0.0141, "num_tokens": 16574763.0, "reward": 1.2971038818359375, "reward_std": 0.21821537613868713, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.600019097328186, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8196269869804382, "step": 75 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.490164039471998, "calib/avg_num_step_conf": 1.28125, "calib/ece": 0.29662745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012091503267973869, "calib/mean_conf": 0.10337254901960784, "calib/mu_c": 0.1026470588235294, "calib/mu_w": 0.10385620915032678, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.04954299411674186, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2006.0, "completions/max_terminated_length": 2006.0, "completions/mean_length": 362.609375, "completions/mean_terminated_length": 362.609375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.08106666666666666, "grad_norm": 0.007501825224608183, "learning_rate": 3.444444444444445e-06, "loss": 0.0288, "num_tokens": 16746327.0, "reward": 1.2401504516601562, "reward_std": 0.22100690007209778, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6580047011375427, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8695923089981079, "step": 76 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5179235179235179, "calib/avg_num_step_conf": 1.2109375, "calib/ece": 0.3316929133858268, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013286713286713436, "calib/mean_conf": 0.10925196850393701, "calib/mu_c": 0.11000000000000001, "calib/mu_w": 0.10867132867132867, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.06096849173396016, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 406.18359375, "completions/mean_terminated_length": 406.18359375, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.08213333333333334, "grad_norm": 0.006342964246869087, "learning_rate": 3.416666666666667e-06, "loss": 0.098, "num_tokens": 16930654.0, "reward": 1.2683703899383545, "reward_std": 0.24026376008987427, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.638453483581543, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8715747594833374, "step": 77 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5432713945862534, "calib/avg_num_step_conf": 1.375, "calib/ece": 0.3044094488188976, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009740886002700455, "calib/mean_conf": 0.1011023622047244, "calib/mu_c": 0.10689320388349513, "calib/mu_w": 0.09715231788079468, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.046528770990141785, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 412.46484375, "completions/mean_terminated_length": 414.0823669433594, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.0832, "grad_norm": 0.007117137312889099, "learning_rate": 3.3888888888888893e-06, "loss": 0.0219, "num_tokens": 17119949.0, "reward": 1.2550793886184692, "reward_std": 0.21007688343524933, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6635695695877075, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8775535821914673, "step": 78 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.525512127686314, "calib/avg_num_step_conf": 1.26953125, "calib/ece": 0.3317254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020114364710318022, "calib/mean_conf": 0.10435294117647059, "calib/mu_c": 0.10550458715596331, "calib/mu_w": 0.1034931506849315, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.004313725490196078, "calib/std_conf": 0.04398164658636054, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2057.0, "completions/max_terminated_length": 2057.0, "completions/mean_length": 450.1015625, "completions/mean_terminated_length": 450.1015625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.08426666666666667, "grad_norm": 0.006018451880663633, "learning_rate": 3.3611111111111117e-06, "loss": 0.0178, "num_tokens": 17317231.0, "reward": 1.2497444152832031, "reward_std": 0.20812439918518066, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6396187543869019, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8306777477264404, "step": 79 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5166295057599405, "calib/avg_num_step_conf": 1.50390625, "calib/ece": 0.35180392156862744, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006296915644741746, "calib/mean_conf": 0.10701960784313726, "calib/mu_c": 0.11042735042735043, "calib/mu_w": 0.10413043478260868, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.052121084588386525, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1906.0, "completions/max_terminated_length": 1906.0, "completions/mean_length": 383.921875, "completions/mean_terminated_length": 385.4274597167969, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.08533333333333333, "grad_norm": 0.008160736411809921, "learning_rate": 3.3333333333333333e-06, "loss": -0.0023, "num_tokens": 17493355.0, "reward": 1.2862913608551025, "reward_std": 0.21927902102470398, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6258856058120728, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8543317317962646, "step": 80 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.560911150264284, "calib/avg_num_step_conf": 1.3359375, "calib/ece": 0.34845454545454535, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012496476214447536, "calib/mean_conf": 0.11004347826086958, "calib/mu_c": 0.11681034482758622, "calib/mu_w": 0.10431386861313868, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.046031460359654164, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2509.0, "completions/max_terminated_length": 2509.0, "completions/mean_length": 449.65234375, "completions/mean_terminated_length": 449.65234375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.0864, "grad_norm": 0.006808146368712187, "learning_rate": 3.3055555555555558e-06, "loss": 0.0815, "num_tokens": 17690394.0, "reward": 1.2709615230560303, "reward_std": 0.2466251254081726, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6269539594650269, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8315008878707886, "step": 81 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5553890384144848, "calib/avg_num_step_conf": 1.23046875, "calib/ece": 0.36363281250000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013037680450208006, "calib/mean_conf": 0.11292968750000004, "calib/mu_c": 0.11975409836065574, "calib/mu_w": 0.10671641791044774, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.043763512840634074, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 380.171875, "completions/mean_terminated_length": 381.66278076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.08746666666666666, "grad_norm": 0.007882577367126942, "learning_rate": 3.277777777777778e-06, "loss": 0.0379, "num_tokens": 17868950.0, "reward": 1.2923544645309448, "reward_std": 0.18476907908916473, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6229097843170166, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8173484206199646, "step": 82 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.549424005945745, "calib/avg_num_step_conf": 1.2265625, "calib/ece": 0.3451960784313725, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007748049052396899, "calib/mean_conf": 0.11362745098039216, "calib/mu_c": 0.11782051282051284, "calib/mu_w": 0.11007246376811594, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04847354677181931, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 441.8515625, "completions/mean_terminated_length": 443.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.08853333333333334, "grad_norm": 0.00784524530172348, "learning_rate": 3.2500000000000002e-06, "loss": 0.0501, "num_tokens": 18065008.0, "reward": 1.2880065441131592, "reward_std": 0.18693551421165466, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.627806544303894, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8620378971099854, "step": 83 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5292688925782452, "calib/avg_num_step_conf": 1.23046875, "calib/ece": 0.33453125000000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008844616614400821, "calib/mean_conf": 0.12562500000000001, "calib/mu_c": 0.13042735042735046, "calib/mu_w": 0.12158273381294964, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015625, "calib/std_conf": 0.05904513633653496, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 392.98046875, "completions/mean_terminated_length": 394.5216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0896, "grad_norm": 0.00771920895203948, "learning_rate": 3.2222222222222227e-06, "loss": 0.0447, "num_tokens": 18247211.0, "reward": 1.292407512664795, "reward_std": 0.20232713222503662, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6421773433685303, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8618377447128296, "step": 84 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5650152827305146, "calib/avg_num_step_conf": 1.30859375, "calib/ece": 0.2864901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016713576158940466, "calib/mean_conf": 0.1213529411764706, "calib/mu_c": 0.13125000000000003, "calib/mu_w": 0.11453642384105957, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.051280016950156174, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 433.58984375, "completions/mean_terminated_length": 433.58984375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.09066666666666667, "grad_norm": 0.006601530127227306, "learning_rate": 3.1944444444444443e-06, "loss": 0.0578, "num_tokens": 18441714.0, "reward": 1.26658296585083, "reward_std": 0.20831358432769775, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6791960000991821, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8876274824142456, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5501410971161126, "calib/avg_num_step_conf": 1.11328125, "calib/ece": 0.21464566929133858, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014422190102553495, "calib/mean_conf": 0.12787401574803148, "calib/mu_c": 0.13735632183908045, "calib/mu_w": 0.12293413173652695, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05324331197155695, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 426.640625, "completions/mean_terminated_length": 426.640625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.09173333333333333, "grad_norm": 0.007412768434733152, "learning_rate": 3.1666666666666667e-06, "loss": 0.0749, "num_tokens": 18632126.0, "reward": 1.212226390838623, "reward_std": 0.19229844212532043, "rewards/accuracy_reward_step": 0.33984375, "rewards/final_brier_reward_step": 0.7252601385116577, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8530725240707397, "step": 86 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5480605487228003, "calib/avg_num_step_conf": 1.13671875, "calib/ece": 0.4560546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012075055187637934, "calib/mean_conf": 0.13378906250000003, "calib/mu_c": 0.1387417218543046, "calib/mu_w": 0.12666666666666668, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05555911158730937, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1123.0, "completions/max_terminated_length": 1123.0, "completions/mean_length": 374.06640625, "completions/mean_terminated_length": 375.5333557128906, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.0928, "grad_norm": 0.01204943098127842, "learning_rate": 3.138888888888889e-06, "loss": 0.0105, "num_tokens": 18809063.0, "reward": 1.3697707653045654, "reward_std": 0.20046617090702057, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5483496189117432, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8323837518692017, "step": 87 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5213235294117646, "calib/avg_num_step_conf": 1.19921875, "calib/ece": 0.3510546875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005906862745098079, "calib/mean_conf": 0.11769531250000004, "calib/mu_c": 0.12083333333333336, "calib/mu_w": 0.11492647058823528, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04484087466282681, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2034.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 450.77734375, "completions/mean_terminated_length": 452.5451354980469, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.09386666666666667, "grad_norm": 0.008023022674024105, "learning_rate": 3.1111111111111116e-06, "loss": 0.0397, "num_tokens": 19009990.0, "reward": 1.2934889793395996, "reward_std": 0.22242587804794312, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6248011589050293, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8540412187576294, "step": 88 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5638919995023017, "calib/avg_num_step_conf": 1.046875, "calib/ece": 0.3166274509803922, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012545726017170605, "calib/mean_conf": 0.13043137254901963, "calib/mu_c": 0.13736842105263158, "calib/mu_w": 0.12482269503546098, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05720457094788664, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1840.0, "completions/max_terminated_length": 1840.0, "completions/mean_length": 425.8515625, "completions/mean_terminated_length": 427.5216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.09493333333333333, "grad_norm": 0.006762202363461256, "learning_rate": 3.0833333333333336e-06, "loss": -0.0273, "num_tokens": 19203576.0, "reward": 1.286118984222412, "reward_std": 0.17611607909202576, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6529195308685303, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8620744347572327, "step": 89 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5667138287401574, "calib/avg_num_step_conf": 1.1015625, "calib/ece": 0.37058823529411766, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01433316929133864, "calib/mean_conf": 0.12745098039215685, "calib/mu_c": 0.13464566929133867, "calib/mu_w": 0.12031250000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05469498802198522, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1697.0, "completions/max_terminated_length": 1697.0, "completions/mean_length": 405.1875, "completions/mean_terminated_length": 406.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.096, "grad_norm": 0.008788934908807278, "learning_rate": 3.055555555555556e-06, "loss": 0.0517, "num_tokens": 19386304.0, "reward": 1.3127435445785522, "reward_std": 0.2274431735277176, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6106835603713989, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8546072244644165, "step": 90 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.536080586080586, "calib/avg_num_step_conf": 1.12109375, "calib/ece": 0.364451171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00711459096459105, "calib/mean_conf": 0.12773632812500002, "calib/mu_c": 0.1313492063492064, "calib/mu_w": 0.12423461538461536, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05600496811627371, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 422.421875, "completions/mean_terminated_length": 424.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.09706666666666666, "grad_norm": 0.006352297496050596, "learning_rate": 3.0277777777777776e-06, "loss": -0.0096, "num_tokens": 19577836.0, "reward": 1.3052550554275513, "reward_std": 0.21970926225185394, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6176562309265137, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8169575929641724, "step": 91 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5578652377432864, "calib/avg_num_step_conf": 1.06640625, "calib/ece": 0.35862745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013063562453806396, "calib/mean_conf": 0.12372549019607845, "calib/mu_c": 0.1304878048780488, "calib/mu_w": 0.11742424242424242, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.051978909158510896, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1062.0, "completions/max_terminated_length": 1062.0, "completions/mean_length": 390.27734375, "completions/mean_terminated_length": 391.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.09813333333333334, "grad_norm": 0.007925019599497318, "learning_rate": 3e-06, "loss": 0.0486, "num_tokens": 19760147.0, "reward": 1.3043065071105957, "reward_std": 0.22197169065475464, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6230762004852295, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8538861870765686, "step": 92 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5310823311748382, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.3203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005932778291705243, "calib/mean_conf": 0.12890625000000003, "calib/mu_c": 0.13217391304347828, "calib/mu_w": 0.12624113475177304, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.051390575117792756, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 398.0625, "completions/mean_terminated_length": 399.6235656738281, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.0992, "grad_norm": 0.013263871893286705, "learning_rate": 2.9722222222222225e-06, "loss": 0.0339, "num_tokens": 19943507.0, "reward": 1.2797186374664307, "reward_std": 0.2574073076248169, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6425390243530273, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8462965488433838, "step": 93 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5782787893700787, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.3829411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02063545767716539, "calib/mean_conf": 0.11901960784313724, "calib/mu_c": 0.12929687500000003, "calib/mu_w": 0.10866141732283464, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05314629834049774, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1234.0, "completions/max_terminated_length": 1234.0, "completions/mean_length": 383.265625, "completions/mean_terminated_length": 384.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.10026666666666667, "grad_norm": 0.010008939541876316, "learning_rate": 2.944444444444445e-06, "loss": -0.0112, "num_tokens": 20125983.0, "reward": 1.3118412494659424, "reward_std": 0.1737186312675476, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6084668636322021, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8351192474365234, "step": 94 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5236263736263738, "calib/avg_num_step_conf": 1.07421875, "calib/ece": 0.384375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005518925518925508, "calib/mean_conf": 0.12343750000000002, "calib/mu_c": 0.12615384615384614, "calib/mu_w": 0.12063492063492064, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.050751193027849105, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1607.0, "completions/max_terminated_length": 1607.0, "completions/mean_length": 394.0546875, "completions/mean_terminated_length": 395.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.10133333333333333, "grad_norm": 0.009900784119963646, "learning_rate": 2.916666666666667e-06, "loss": 0.0047, "num_tokens": 20308669.0, "reward": 1.3128812313079834, "reward_std": 0.20290324091911316, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6024999618530273, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8152750730514526, "step": 95 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5637755102040818, "calib/avg_num_step_conf": 1.0390625, "calib/ece": 0.4892578125000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017973391888400927, "calib/mean_conf": 0.1279296875, "calib/mu_c": 0.13481012658227848, "calib/mu_w": 0.11683673469387755, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.056173654244248195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 965.0, "completions/max_terminated_length": 965.0, "completions/mean_length": 363.50390625, "completions/mean_terminated_length": 364.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.1024, "grad_norm": 0.01069644931703806, "learning_rate": 2.888888888888889e-06, "loss": 0.0379, "num_tokens": 20483222.0, "reward": 1.3792392015457153, "reward_std": 0.1949031949043274, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5296972990036011, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7888127565383911, "step": 96 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6126050420168067, "calib/avg_num_step_conf": 1.046875, "calib/ece": 0.40688976377952757, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02732337379396202, "calib/mean_conf": 0.12460629921259843, "calib/mu_c": 0.1374074074074074, "calib/mu_w": 0.11008403361344538, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05797469682655482, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1152.0, "completions/max_terminated_length": 1152.0, "completions/mean_length": 379.9140625, "completions/mean_terminated_length": 381.4039306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.10346666666666667, "grad_norm": 0.011838745325803757, "learning_rate": 2.861111111111111e-06, "loss": 0.0095, "num_tokens": 20661232.0, "reward": 1.335360050201416, "reward_std": 0.21025191247463226, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5910254120826721, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8437644243240356, "step": 97 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.500244140625, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.36464843750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011718750000000167, "calib/mean_conf": 0.13535156250000002, "calib/mu_c": 0.13476562500000003, "calib/mu_w": 0.13593750000000004, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05986861785450366, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1440.0, "completions/max_terminated_length": 1440.0, "completions/mean_length": 402.171875, "completions/mean_terminated_length": 403.7490539550781, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.10453333333333334, "grad_norm": 0.013612802140414715, "learning_rate": 2.8333333333333335e-06, "loss": 0.0005, "num_tokens": 20846052.0, "reward": 1.315796136856079, "reward_std": 0.21005511283874512, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6128613352775574, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8374618887901306, "step": 98 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5737812911725955, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.13203125000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016724792683871947, "calib/mean_conf": 0.14140625000000004, "calib/mu_c": 0.15362318840579708, "calib/mu_w": 0.13689839572192514, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001953125, "calib/std_conf": 0.06676374361086637, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 460.77734375, "completions/mean_terminated_length": 462.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1056, "grad_norm": 0.007334718015044928, "learning_rate": 2.805555555555556e-06, "loss": -0.0092, "num_tokens": 21045491.0, "reward": 1.188649296760559, "reward_std": 0.18395203351974487, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7888281345367432, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8988158702850342, "step": 99 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.571071071071071, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.29254901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017098348348348447, "calib/mean_conf": 0.14980392156862743, "calib/mu_c": 0.15945945945945955, "calib/mu_w": 0.1423611111111111, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0035294117647058825, "calib/std_conf": 0.07468863987111943, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1628.0, "completions/max_terminated_length": 1628.0, "completions/mean_length": 432.60546875, "completions/mean_terminated_length": 434.3019714355469, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.10666666666666667, "grad_norm": 0.011174692772328854, "learning_rate": 2.7777777777777783e-06, "loss": -0.0189, "num_tokens": 21239326.0, "reward": 1.2878634929656982, "reward_std": 0.20618480443954468, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.669003963470459, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8728216886520386, "step": 100 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6354033506021664, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.20195312500000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03624436520217991, "calib/mean_conf": 0.14882812500000003, "calib/mu_c": 0.17247191011235954, "calib/mu_w": 0.13622754491017963, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015625, "calib/std_conf": 0.07420538530985722, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1819.0, "completions/max_terminated_length": 1819.0, "completions/mean_length": 445.125, "completions/mean_terminated_length": 446.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.10773333333333333, "grad_norm": 0.008462260477244854, "learning_rate": 2.7500000000000004e-06, "loss": -0.0132, "num_tokens": 21435950.0, "reward": 1.2457029819488525, "reward_std": 0.18622824549674988, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7408593893051147, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9151560068130493, "step": 101 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6699993570372276, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.4326771653543307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04813862277374137, "calib/mean_conf": 0.1625984251968504, "calib/mu_c": 0.1821192052980132, "calib/mu_w": 0.13398058252427184, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.07343337650497607, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1068.0, "completions/max_terminated_length": 1068.0, "completions/mean_length": 357.359375, "completions/mean_terminated_length": 358.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.1088, "grad_norm": 0.009046705439686775, "learning_rate": 2.7222222222222224e-06, "loss": 0.0208, "num_tokens": 21609810.0, "reward": 1.3842028379440308, "reward_std": 0.21628153324127197, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.581113338470459, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8339598178863525, "step": 102 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6123626373626374, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.3538671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.031056166056165985, "calib/mean_conf": 0.15394531250000004, "calib/mu_c": 0.16923076923076924, "calib/mu_w": 0.13817460317460326, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.07148304088577476, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1770.0, "completions/max_terminated_length": 1770.0, "completions/mean_length": 467.49609375, "completions/mean_terminated_length": 469.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.10986666666666667, "grad_norm": 0.016644254326820374, "learning_rate": 2.6944444444444444e-06, "loss": 0.0239, "num_tokens": 21809721.0, "reward": 1.3374496698379517, "reward_std": 0.20862965285778046, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6345113515853882, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8542134761810303, "step": 103 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6197704321645863, "calib/avg_num_step_conf": 1.15625, "calib/ece": 0.2625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03395847707457819, "calib/mean_conf": 0.15546875, "calib/mu_c": 0.17523364485981308, "calib/mu_w": 0.14127516778523488, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07294967630796931, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1040.0, "completions/max_terminated_length": 1040.0, "completions/mean_length": 410.25390625, "completions/mean_terminated_length": 411.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.11093333333333333, "grad_norm": 0.010387678630650043, "learning_rate": 2.666666666666667e-06, "loss": 0.0168, "num_tokens": 21997106.0, "reward": 1.29205322265625, "reward_std": 0.17050066590309143, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6990234851837158, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.898290753364563, "step": 104 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6384274640088594, "calib/avg_num_step_conf": 1.16015625, "calib/ece": 0.3392156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.039147286821705346, "calib/mean_conf": 0.16980392156862745, "calib/mu_c": 0.18914728682170542, "calib/mu_w": 0.15000000000000008, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.07865262909180258, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2895.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 446.64453125, "completions/mean_terminated_length": 446.64453125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.112, "grad_norm": 0.018622733652591705, "learning_rate": 2.6388888888888893e-06, "loss": 0.0452, "num_tokens": 22192887.0, "reward": 1.342691421508789, "reward_std": 0.25465911626815796, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6479296684265137, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8639689683914185, "step": 105 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6150293613897725, "calib/avg_num_step_conf": 1.109375, "calib/ece": 0.29121093750000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03357597259603623, "calib/mean_conf": 0.1865234375, "calib/mu_c": 0.20409836065573772, "calib/mu_w": 0.1705223880597015, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0005859375000000001, "calib/std_conf": 0.08006359433714923, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 400.0078125, "completions/mean_terminated_length": 401.5765075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.11306666666666666, "grad_norm": 0.011190079152584076, "learning_rate": 2.6111111111111113e-06, "loss": -0.0159, "num_tokens": 22375553.0, "reward": 1.330780029296875, "reward_std": 0.21178483963012695, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6729004383087158, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8757568597793579, "step": 106 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5785692260252853, "calib/avg_num_step_conf": 1.125, "calib/ece": 0.3541015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023179155103299348, "calib/mean_conf": 0.19667968750000003, "calib/mu_c": 0.20709219858156028, "calib/mu_w": 0.18391304347826093, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07954160012158631, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1371.0, "completions/max_terminated_length": 1371.0, "completions/mean_length": 409.85546875, "completions/mean_terminated_length": 411.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.11413333333333334, "grad_norm": 0.01202862337231636, "learning_rate": 2.5833333333333337e-06, "loss": 0.016, "num_tokens": 22560772.0, "reward": 1.383345365524292, "reward_std": 0.2616827189922333, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6323339939117432, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8655886650085449, "step": 107 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5877176558696874, "calib/avg_num_step_conf": 1.10546875, "calib/ece": 0.3658203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027051738126443153, "calib/mean_conf": 0.2095703125, "calib/mu_c": 0.22108843537414966, "calib/mu_w": 0.1940366972477065, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005859375000000001, "calib/std_conf": 0.08038123844313635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1279.0, "completions/max_terminated_length": 1279.0, "completions/mean_length": 417.55859375, "completions/mean_terminated_length": 419.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1152, "grad_norm": 0.0160062238574028, "learning_rate": 2.5555555555555557e-06, "loss": 0.0162, "num_tokens": 22746579.0, "reward": 1.4096243381500244, "reward_std": 0.22388026118278503, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6293066143989563, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8830093741416931, "step": 108 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6354875283446711, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.20647058823529413, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04014550264550262, "calib/mean_conf": 0.22176470588235292, "calib/mu_c": 0.2449074074074074, "calib/mu_w": 0.20476190476190478, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0023529411764705885, "calib/std_conf": 0.08658455874547602, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1906.0, "completions/max_terminated_length": 1906.0, "completions/mean_length": 421.09375, "completions/mean_terminated_length": 422.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.11626666666666667, "grad_norm": 0.011149784550070763, "learning_rate": 2.5277777777777778e-06, "loss": -0.0142, "num_tokens": 22934659.0, "reward": 1.3087148666381836, "reward_std": 0.18745285272598267, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.7244043350219727, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9032385945320129, "step": 109 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5625038544557508, "calib/avg_num_step_conf": 1.203125, "calib/ece": 0.21113281250000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01761640456367558, "calib/mean_conf": 0.24160156249999998, "calib/mu_c": 0.25130434782608696, "calib/mu_w": 0.23368794326241138, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0017578125, "calib/std_conf": 0.07124570599382529, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 386.8984375, "completions/mean_terminated_length": 388.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.11733333333333333, "grad_norm": 0.019615396857261658, "learning_rate": 2.5e-06, "loss": 0.0011, "num_tokens": 23114305.0, "reward": 1.3225723505020142, "reward_std": 0.24186158180236816, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.7060058116912842, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8907778263092041, "step": 110 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6133149678604224, "calib/avg_num_step_conf": 1.078125, "calib/ece": 0.22578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.031074380165289184, "calib/mean_conf": 0.25468749999999996, "calib/mu_c": 0.27107438016528923, "calib/mu_w": 0.24000000000000005, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00390625, "calib/std_conf": 0.07588990277863057, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1702.0, "completions/max_terminated_length": 1702.0, "completions/mean_length": 398.65234375, "completions/mean_terminated_length": 400.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1184, "grad_norm": 0.020358692854642868, "learning_rate": 2.4722222222222226e-06, "loss": -0.0176, "num_tokens": 23299448.0, "reward": 1.3487699031829834, "reward_std": 0.26086199283599854, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7109765410423279, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.887189507484436, "step": 111 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6372567482736975, "calib/avg_num_step_conf": 1.07421875, "calib/ece": 0.21581027667984193, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03700564971751413, "calib/mean_conf": 0.25059288537549407, "calib/mu_c": 0.27033898305084747, "calib/mu_w": 0.23333333333333334, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.070357951734733, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2581.0, "completions/max_terminated_length": 2581.0, "completions/mean_length": 428.26171875, "completions/mean_terminated_length": 433.3399353027344, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.11946666666666667, "grad_norm": 0.013701686635613441, "learning_rate": 2.4444444444444447e-06, "loss": -0.0403, "num_tokens": 23492683.0, "reward": 1.3275620937347412, "reward_std": 0.25652819871902466, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.7076171636581421, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8700134754180908, "step": 112 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6255586164677074, "calib/avg_num_step_conf": 1.16015625, "calib/ece": 0.26171875000000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030413223140495882, "calib/mean_conf": 0.26562499999999994, "calib/mu_c": 0.28, "calib/mu_w": 0.24958677685950414, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06304450313072504, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1103.0, "completions/max_terminated_length": 1103.0, "completions/mean_length": 395.12890625, "completions/mean_terminated_length": 396.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.12053333333333334, "grad_norm": 0.0159810371696949, "learning_rate": 2.4166666666666667e-06, "loss": 0.0352, "num_tokens": 23674716.0, "reward": 1.3917934894561768, "reward_std": 0.254723459482193, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6875781416893005, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8920177817344666, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.610899014778325, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.27968750000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027536945812807967, "calib/mean_conf": 0.27109374999999997, "calib/mu_c": 0.2835714285714286, "calib/mu_w": 0.2560344827586206, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001953125, "calib/std_conf": 0.05949309801092476, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1902.0, "completions/max_terminated_length": 1902.0, "completions/mean_length": 368.24609375, "completions/mean_terminated_length": 369.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1216, "grad_norm": 0.016689948737621307, "learning_rate": 2.388888888888889e-06, "loss": -0.0054, "num_tokens": 23849691.0, "reward": 1.4119011163711548, "reward_std": 0.20957492291927338, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6862499713897705, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8876047134399414, "step": 114 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6031255848774096, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.19330708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02889138436583677, "calib/mean_conf": 0.2673228346456693, "calib/mu_c": 0.2829059829059828, "calib/mu_w": 0.254014598540146, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.06927008442819448, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2621.0, "completions/max_terminated_length": 2621.0, "completions/mean_length": 434.8828125, "completions/mean_terminated_length": 434.8828125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.12266666666666666, "grad_norm": 0.023221217095851898, "learning_rate": 2.361111111111111e-06, "loss": 0.0444, "num_tokens": 24041965.0, "reward": 1.3284200429916382, "reward_std": 0.3041614294052124, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7142187356948853, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8711804747581482, "step": 115 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6542277825711821, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.2168627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.042253173918402526, "calib/mean_conf": 0.26156862745098036, "calib/mu_c": 0.28360655737704915, "calib/mu_w": 0.24135338345864663, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0663730558673844, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 987.0, "completions/max_terminated_length": 987.0, "completions/mean_length": 425.69140625, "completions/mean_terminated_length": 427.3608093261719, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.12373333333333333, "grad_norm": 0.012497956864535809, "learning_rate": 2.3333333333333336e-06, "loss": -0.0198, "num_tokens": 24231142.0, "reward": 1.3626313209533691, "reward_std": 0.2106991708278656, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7173047065734863, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8987287282943726, "step": 116 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6378205128205129, "calib/avg_num_step_conf": 1.078125, "calib/ece": 0.22695312500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032515262515262405, "calib/mean_conf": 0.265234375, "calib/mu_c": 0.28174603174603163, "calib/mu_w": 0.24923076923076923, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06189033703543207, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1812.0, "completions/max_terminated_length": 1812.0, "completions/mean_length": 401.19140625, "completions/mean_terminated_length": 402.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.1248, "grad_norm": 0.0186613742262125, "learning_rate": 2.305555555555556e-06, "loss": 0.0198, "num_tokens": 24416127.0, "reward": 1.368299961090088, "reward_std": 0.23996347188949585, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.7074218988418579, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8942936658859253, "step": 117 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6276030534351145, "calib/avg_num_step_conf": 1.1171875, "calib/ece": 0.24062500000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030876335877862537, "calib/mean_conf": 0.27499999999999997, "calib/mu_c": 0.2900763358778626, "calib/mu_w": 0.25920000000000004, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001953125, "calib/std_conf": 0.059292706128157104, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1060.0, "completions/max_terminated_length": 1060.0, "completions/mean_length": 385.32421875, "completions/mean_terminated_length": 386.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.12586666666666665, "grad_norm": 0.013285689987242222, "learning_rate": 2.277777777777778e-06, "loss": -0.0066, "num_tokens": 24594458.0, "reward": 1.377181053161621, "reward_std": 0.2066250741481781, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6987109184265137, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8738024234771729, "step": 118 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.599326495304004, "calib/avg_num_step_conf": 1.11328125, "calib/ece": 0.27686274509803926, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024999999999999967, "calib/mean_conf": 0.26039215686274514, "calib/mu_c": 0.27205882352941174, "calib/mu_w": 0.24705882352941178, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00196078431372549, "calib/std_conf": 0.06230729615519736, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2266.0, "completions/max_terminated_length": 2266.0, "completions/mean_length": 425.3671875, "completions/mean_terminated_length": 427.0353088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.12693333333333334, "grad_norm": 0.021426772698760033, "learning_rate": 2.25e-06, "loss": -0.0409, "num_tokens": 24784096.0, "reward": 1.3954346179962158, "reward_std": 0.24831436574459076, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6825000047683716, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8964260220527649, "step": 119 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.606035334815913, "calib/avg_num_step_conf": 1.11328125, "calib/ece": 0.312109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023881887818136938, "calib/mean_conf": 0.245703125, "calib/mu_c": 0.25633802816901413, "calib/mu_w": 0.2324561403508772, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015625, "calib/std_conf": 0.06540957013491508, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 385.70703125, "completions/mean_terminated_length": 387.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.128, "grad_norm": 0.013562988489866257, "learning_rate": 2.222222222222222e-06, "loss": -0.0012, "num_tokens": 24965205.0, "reward": 1.4110522270202637, "reward_std": 0.2052556872367859, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6650390625, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8953807353973389, "step": 120 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6352926587301587, "calib/avg_num_step_conf": 1.0390625, "calib/ece": 0.2633858267716537, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03164682539682537, "calib/mean_conf": 0.24055118110236223, "calib/mu_c": 0.25625, "calib/mu_w": 0.2246031746031746, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.060585166015491625, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 445.78125, "completions/mean_terminated_length": 449.2913513183594, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.12906666666666666, "grad_norm": 0.017739994451403618, "learning_rate": 2.1944444444444445e-06, "loss": 0.0258, "num_tokens": 25160061.0, "reward": 1.3621540069580078, "reward_std": 0.2875977158546448, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6838281154632568, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8950220346450806, "step": 121 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6609586881110059, "calib/avg_num_step_conf": 1.0859375, "calib/ece": 0.34316406250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04185745821507403, "calib/mean_conf": 0.24707031249999997, "calib/mu_c": 0.26423841059602643, "calib/mu_w": 0.2223809523809524, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0001953125, "calib/std_conf": 0.06481050305430704, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1336.0, "completions/max_terminated_length": 1336.0, "completions/mean_length": 403.1484375, "completions/mean_terminated_length": 404.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.13013333333333332, "grad_norm": 0.018619483336806297, "learning_rate": 2.166666666666667e-06, "loss": -0.0262, "num_tokens": 25346291.0, "reward": 1.436205506324768, "reward_std": 0.1859695315361023, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6566308736801147, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8721853494644165, "step": 122 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6331501831501831, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.2750000000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03323565323565328, "calib/mean_conf": 0.23671874999999998, "calib/mu_c": 0.25307692307692314, "calib/mu_w": 0.21984126984126987, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001953125, "calib/std_conf": 0.06945220225764982, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1452.0, "completions/max_terminated_length": 1452.0, "completions/mean_length": 452.7421875, "completions/mean_terminated_length": 454.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.1312, "grad_norm": 0.011628609150648117, "learning_rate": 2.138888888888889e-06, "loss": 0.0318, "num_tokens": 25543161.0, "reward": 1.3704475164413452, "reward_std": 0.25859522819519043, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6883593797683716, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8738211989402771, "step": 123 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6389239706996717, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.337843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03546981561000259, "calib/mean_conf": 0.24254901960784314, "calib/mu_c": 0.25743243243243247, "calib/mu_w": 0.22196261682242988, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06540501681468346, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 413.26171875, "completions/mean_terminated_length": 413.26171875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.13226666666666667, "grad_norm": 0.012678918428719044, "learning_rate": 2.1111111111111114e-06, "loss": 0.0195, "num_tokens": 25731452.0, "reward": 1.4276506900787354, "reward_std": 0.23279699683189392, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6527636647224426, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8972632884979248, "step": 124 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6089549413101097, "calib/avg_num_step_conf": 1.10546875, "calib/ece": 0.30355731225296445, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02757162690899917, "calib/mean_conf": 0.2458498023715415, "calib/mu_c": 0.2582733812949641, "calib/mu_w": 0.2307017543859649, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.07190634808706294, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2338.0, "completions/max_terminated_length": 2338.0, "completions/mean_length": 436.14453125, "completions/mean_terminated_length": 437.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.13333333333333333, "grad_norm": 0.010722563602030277, "learning_rate": 2.0833333333333334e-06, "loss": 0.0323, "num_tokens": 25923593.0, "reward": 1.3947930335998535, "reward_std": 0.2935622036457062, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.658007800579071, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8944070339202881, "step": 125 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6487941429801896, "calib/avg_num_step_conf": 1.12890625, "calib/ece": 0.257843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04203580657069031, "calib/mean_conf": 0.23627450980392156, "calib/mu_c": 0.25753968253968257, "calib/mu_w": 0.21550387596899226, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.07321632691360917, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1216.0, "completions/max_terminated_length": 1216.0, "completions/mean_length": 409.92578125, "completions/mean_terminated_length": 411.5333557128906, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1344, "grad_norm": 0.012465385720133781, "learning_rate": 2.0555555555555555e-06, "loss": 0.0187, "num_tokens": 26109678.0, "reward": 1.3625491857528687, "reward_std": 0.18904824554920197, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6927245855331421, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8897475004196167, "step": 126 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.712732824427481, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.244140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.054442748091603016, "calib/mean_conf": 0.244140625, "calib/mu_c": 0.27199999999999996, "calib/mu_w": 0.21755725190839695, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07046749409911901, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1611.0, "completions/max_terminated_length": 1611.0, "completions/mean_length": 408.80859375, "completions/mean_terminated_length": 410.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.13546666666666668, "grad_norm": 0.021145053207874298, "learning_rate": 2.027777777777778e-06, "loss": 0.0046, "num_tokens": 26293685.0, "reward": 1.371145486831665, "reward_std": 0.19424743950366974, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7127734422683716, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9059106111526489, "step": 127 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6013173429441373, "calib/avg_num_step_conf": 1.046875, "calib/ece": 0.22322834645669293, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02987012987012977, "calib/mean_conf": 0.2570866141732283, "calib/mu_c": 0.2727272727272726, "calib/mu_w": 0.24285714285714283, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.07049443197111299, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3051.0, "completions/max_terminated_length": 3051.0, "completions/mean_length": 470.16796875, "completions/mean_terminated_length": 470.16796875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.13653333333333334, "grad_norm": 0.021324368193745613, "learning_rate": 2.0000000000000003e-06, "loss": 0.0349, "num_tokens": 26496392.0, "reward": 1.352662205696106, "reward_std": 0.2628103792667389, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.7068359851837158, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9001019597053528, "step": 128 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5622184684684683, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.30784313725490203, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017736486486486513, "calib/mean_conf": 0.2568627450980392, "calib/mu_c": 0.26458333333333334, "calib/mu_w": 0.24684684684684682, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0710009719921987, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 871.0, "completions/max_terminated_length": 871.0, "completions/mean_length": 368.359375, "completions/mean_terminated_length": 369.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1376, "grad_norm": 0.014148689806461334, "learning_rate": 1.9722222222222224e-06, "loss": 0.0068, "num_tokens": 26668756.0, "reward": 1.4037660360336304, "reward_std": 0.2250811755657196, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6555860042572021, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8679548501968384, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6164713541666667, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.37246093750000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03072916666666667, "calib/mean_conf": 0.2525390624999999, "calib/mu_c": 0.2640625, "calib/mu_w": 0.2333333333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06572461515004172, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 375.1640625, "completions/mean_terminated_length": 376.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.13866666666666666, "grad_norm": 0.017018822953104973, "learning_rate": 1.944444444444445e-06, "loss": 0.0049, "num_tokens": 26845766.0, "reward": 1.4653195142745972, "reward_std": 0.13975851237773895, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6369824409484863, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.887313187122345, "step": 130 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7051282051282051, "calib/avg_num_step_conf": 1.13671875, "calib/ece": 0.12109375000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05919413919413921, "calib/mean_conf": 0.234375, "calib/mu_c": 0.27252747252747256, "calib/mu_w": 0.21333333333333335, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07749747979773276, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1217.0, "completions/max_terminated_length": 1217.0, "completions/mean_length": 405.03515625, "completions/mean_terminated_length": 406.6235656738281, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.13973333333333332, "grad_norm": 0.014553806744515896, "learning_rate": 1.916666666666667e-06, "loss": -0.0039, "num_tokens": 27031343.0, "reward": 1.2662906646728516, "reward_std": 0.1472969353199005, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.77734375, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.88860023021698, "step": 131 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6583458458458459, "calib/avg_num_step_conf": 1.078125, "calib/ece": 0.33019607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.046753003003003, "calib/mean_conf": 0.23450980392156862, "calib/mu_c": 0.25486111111111115, "calib/mu_w": 0.20810810810810815, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07608853783690175, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1798.0, "completions/max_terminated_length": 1798.0, "completions/mean_length": 403.84765625, "completions/mean_terminated_length": 405.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1408, "grad_norm": 0.014779605902731419, "learning_rate": 1.888888888888889e-06, "loss": 0.0146, "num_tokens": 27216000.0, "reward": 1.4141907691955566, "reward_std": 0.27150774002075195, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6597656011581421, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8919193744659424, "step": 132 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6344942935852027, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.18023715415019764, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.039898989898989934, "calib/mean_conf": 0.21106719367588936, "calib/mu_c": 0.23535353535353537, "calib/mu_w": 0.19545454545454544, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.07974791948932293, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2233.0, "completions/max_terminated_length": 2233.0, "completions/mean_length": 474.875, "completions/mean_terminated_length": 480.5059509277344, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.14186666666666667, "grad_norm": 0.013779650442302227, "learning_rate": 1.8611111111111113e-06, "loss": -0.017, "num_tokens": 27419592.0, "reward": 1.2738828659057617, "reward_std": 0.2782604396343231, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.7297265529632568, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.907953143119812, "step": 133 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6899810366624526, "calib/avg_num_step_conf": 1.06640625, "calib/ece": 0.2434782608695652, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05345764854614407, "calib/mean_conf": 0.20316205533596837, "calib/mu_c": 0.23274336283185837, "calib/mu_w": 0.1792857142857143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.07641860884361723, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1774.0, "completions/max_terminated_length": 1774.0, "completions/mean_length": 459.703125, "completions/mean_terminated_length": 463.3228454589844, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.14293333333333333, "grad_norm": 0.017993899062275887, "learning_rate": 1.8333333333333333e-06, "loss": 0.001, "num_tokens": 27621908.0, "reward": 1.319052815437317, "reward_std": 0.2571974992752075, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7057812809944153, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9130858778953552, "step": 134 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6095503875968993, "calib/avg_num_step_conf": 1.12109375, "calib/ece": 0.2751968503937008, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.031249612403100757, "calib/mean_conf": 0.2169291338582677, "calib/mu_c": 0.23279999999999998, "calib/mu_w": 0.20155038759689922, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.08028504151693379, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2629.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 436.69921875, "completions/mean_terminated_length": 438.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.144, "grad_norm": 0.021255752071738243, "learning_rate": 1.8055555555555557e-06, "loss": 0.0371, "num_tokens": 27815263.0, "reward": 1.3500044345855713, "reward_std": 0.25803160667419434, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6781641244888306, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8999398946762085, "step": 135 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6774089382588478, "calib/avg_num_step_conf": 1.19140625, "calib/ece": 0.16972656250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.049438129682252585, "calib/mean_conf": 0.2205078125, "calib/mu_c": 0.25102040816326526, "calib/mu_w": 0.20158227848101268, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0037109375, "calib/std_conf": 0.07793311395975938, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 429.92578125, "completions/mean_terminated_length": 431.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.14506666666666668, "grad_norm": 0.025372860953211784, "learning_rate": 1.777777777777778e-06, "loss": -0.0017, "num_tokens": 28009492.0, "reward": 1.2842981815338135, "reward_std": 0.2165205031633377, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.7442480325698853, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9315086603164673, "step": 136 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5685860993948284, "calib/avg_num_step_conf": 1.12890625, "calib/ece": 0.319921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.016315178189375834, "calib/mean_conf": 0.221484375, "calib/mu_c": 0.22932330827067668, "calib/mu_w": 0.21300813008130084, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0109375, "calib/std_conf": 0.08731757629973116, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1803.0, "completions/max_terminated_length": 1803.0, "completions/mean_length": 425.12890625, "completions/mean_terminated_length": 426.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.14613333333333334, "grad_norm": 0.017735032364726067, "learning_rate": 1.75e-06, "loss": 0.001, "num_tokens": 28200989.0, "reward": 1.3787713050842285, "reward_std": 0.18992286920547485, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6583203077316284, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9250069856643677, "step": 137 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6439030796506665, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.41299212598425206, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04133560969203495, "calib/mean_conf": 0.20905511811023622, "calib/mu_c": 0.22484076433121022, "calib/mu_w": 0.18350515463917527, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.08250920321720014, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 437.875, "completions/mean_terminated_length": 439.5921936035156, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.1472, "grad_norm": 0.014441309496760368, "learning_rate": 1.7222222222222224e-06, "loss": -0.0072, "num_tokens": 28393101.0, "reward": 1.4396114349365234, "reward_std": 0.23829461634159088, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6007031202316284, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.917976975440979, "step": 138 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6668275107765553, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.3953125000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05235797465096825, "calib/mean_conf": 0.21796875000000002, "calib/mu_c": 0.23821656050955417, "calib/mu_w": 0.18585858585858592, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.08332316018633414, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1193.0, "completions/max_terminated_length": 1193.0, "completions/mean_length": 411.86328125, "completions/mean_terminated_length": 413.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.14826666666666666, "grad_norm": 0.0161239393055439, "learning_rate": 1.6944444444444446e-06, "loss": 0.0096, "num_tokens": 28577314.0, "reward": 1.4570918083190918, "reward_std": 0.17924131453037262, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6244531273841858, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9263359904289246, "step": 139 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.620045045045045, "calib/avg_num_step_conf": 1.11328125, "calib/ece": 0.3697628458498024, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03578185328185324, "calib/mean_conf": 0.21521739130434783, "calib/mu_c": 0.23006756756756755, "calib/mu_w": 0.1942857142857143, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.0815829809491323, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 925.0, "completions/max_terminated_length": 925.0, "completions/mean_length": 420.44921875, "completions/mean_terminated_length": 422.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.14933333333333335, "grad_norm": 0.015183106064796448, "learning_rate": 1.6666666666666667e-06, "loss": 0.0102, "num_tokens": 28765645.0, "reward": 1.4175596237182617, "reward_std": 0.22534862160682678, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6238183975219727, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9085395336151123, "step": 140 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7487771739130435, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.41130952380952385, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0859782608695652, "calib/mean_conf": 0.22361111111111115, "calib/mu_c": 0.255, "calib/mu_w": 0.1690217391304348, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.08823700697753031, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2652.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 508.20703125, "completions/mean_terminated_length": 508.20703125, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.1504, "grad_norm": 0.011680899187922478, "learning_rate": 1.638888888888889e-06, "loss": 0.0494, "num_tokens": 28978522.0, "reward": 1.4483433961868286, "reward_std": 0.22178013622760773, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6166308522224426, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8882371187210083, "step": 141 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6945656238314845, "calib/avg_num_step_conf": 1.1875, "calib/ece": 0.33125490196078433, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0649295774647887, "calib/mean_conf": 0.22615686274509805, "calib/mu_c": 0.2549295774647887, "calib/mu_w": 0.19000000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0002745098039215687, "calib/std_conf": 0.08723270041406593, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 456.234375, "completions/mean_terminated_length": 458.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.15146666666666667, "grad_norm": 0.026742398738861084, "learning_rate": 1.6111111111111113e-06, "loss": 0.0046, "num_tokens": 29176158.0, "reward": 1.4205234050750732, "reward_std": 0.22015342116355896, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6656917929649353, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9366475343704224, "step": 142 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7243349753694581, "calib/avg_num_step_conf": 1.078125, "calib/ece": 0.33160000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07523809523809522, "calib/mean_conf": 0.24839999999999998, "calib/mu_c": 0.27999999999999997, "calib/mu_w": 0.20476190476190476, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.08400857099129826, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2764.0, "completions/max_terminated_length": 2764.0, "completions/mean_length": 454.6171875, "completions/mean_terminated_length": 454.6171875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.15253333333333333, "grad_norm": 0.012399648316204548, "learning_rate": 1.5833333333333333e-06, "loss": 0.0181, "num_tokens": 29375556.0, "reward": 1.413042426109314, "reward_std": 0.2350015789270401, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.656445324420929, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8908417224884033, "step": 143 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6306699958385352, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.4079681274900399, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04504785684560961, "calib/mean_conf": 0.2414342629482072, "calib/mu_c": 0.2574074074074074, "calib/mu_w": 0.2123595505617978, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.00199203187250996, "calib/std_conf": 0.08721187429063451, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2071.0, "completions/max_terminated_length": 2071.0, "completions/mean_length": 452.03125, "completions/mean_terminated_length": 455.5905456542969, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.1536, "grad_norm": 0.015421283431351185, "learning_rate": 1.5555555555555558e-06, "loss": 0.0031, "num_tokens": 29571084.0, "reward": 1.446988582611084, "reward_std": 0.2860656678676605, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6020312309265137, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8854540586471558, "step": 144 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5864285714285714, "calib/avg_num_step_conf": 1.20703125, "calib/ece": 0.3535433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02918181818181817, "calib/mean_conf": 0.2566929133858268, "calib/mu_c": 0.2681818181818182, "calib/mu_w": 0.23900000000000002, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.001968503937007874, "calib/std_conf": 0.08237665671673708, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 448.1484375, "completions/mean_terminated_length": 448.1484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.15466666666666667, "grad_norm": 0.020210454240441322, "learning_rate": 1.527777777777778e-06, "loss": 0.0296, "num_tokens": 29764194.0, "reward": 1.4496335983276367, "reward_std": 0.22999891638755798, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6411718130111694, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9193153381347656, "step": 145 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6356532742802912, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.18149606299212603, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04190446061372982, "calib/mean_conf": 0.24763779527559054, "calib/mu_c": 0.2715596330275229, "calib/mu_w": 0.22965517241379307, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.07870963975533606, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 479.94921875, "completions/mean_terminated_length": 479.94921875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.15573333333333333, "grad_norm": 0.019039005041122437, "learning_rate": 1.5e-06, "loss": 0.0385, "num_tokens": 29969957.0, "reward": 1.3201770782470703, "reward_std": 0.22056898474693298, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.730664074420929, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.925629734992981, "step": 146 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6030879307098475, "calib/avg_num_step_conf": 1.17578125, "calib/ece": 0.21594488188976374, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03393899453963467, "calib/mean_conf": 0.2395669291338583, "calib/mu_c": 0.25840707964601767, "calib/mu_w": 0.224468085106383, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00531496062992126, "calib/std_conf": 0.09146343794719812, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1561.0, "completions/max_terminated_length": 1561.0, "completions/mean_length": 499.375, "completions/mean_terminated_length": 499.375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.1568, "grad_norm": 0.015556562691926956, "learning_rate": 1.4722222222222225e-06, "loss": -0.0014, "num_tokens": 30177157.0, "reward": 1.3237640857696533, "reward_std": 0.19050589203834534, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7136621475219727, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9114823937416077, "step": 147 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.605428279585583, "calib/avg_num_step_conf": 1.1171875, "calib/ece": 0.4505882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.038246023639282006, "calib/mean_conf": 0.2513725490196078, "calib/mu_c": 0.26292134831460673, "calib/mu_w": 0.22467532467532472, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00196078431372549, "calib/std_conf": 0.08715592535723, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1449.0, "completions/max_terminated_length": 1449.0, "completions/mean_length": 460.58984375, "completions/mean_terminated_length": 462.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.15786666666666666, "grad_norm": 0.025586625561118126, "learning_rate": 1.4444444444444445e-06, "loss": 0.0233, "num_tokens": 30375860.0, "reward": 1.5209028720855713, "reward_std": 0.19411450624465942, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.5958983898162842, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9152524471282959, "step": 148 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6476557246775475, "calib/avg_num_step_conf": 1.30078125, "calib/ece": 0.29023437500000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04970352711045911, "calib/mean_conf": 0.23476562499999998, "calib/mu_c": 0.2586466165413534, "calib/mu_w": 0.2089430894308943, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002734375000000001, "calib/std_conf": 0.09015147707253263, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1677.0, "completions/max_terminated_length": 1677.0, "completions/mean_length": 506.140625, "completions/mean_terminated_length": 508.1255187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.15893333333333334, "grad_norm": 0.014532111585140228, "learning_rate": 1.4166666666666667e-06, "loss": -0.0186, "num_tokens": 30585568.0, "reward": 1.389301061630249, "reward_std": 0.25378724932670593, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6859765648841858, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9071261286735535, "step": 149 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5439709595959595, "calib/avg_num_step_conf": 1.19921875, "calib/ece": 0.26567460317460323, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013939393939393974, "calib/mean_conf": 0.2748015873015873, "calib/mu_c": 0.28143939393939393, "calib/mu_w": 0.26749999999999996, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.008333333333333338, "calib/std_conf": 0.07695829831811614, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1974.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 438.8046875, "completions/mean_terminated_length": 440.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.16, "grad_norm": 0.018813475966453552, "learning_rate": 1.3888888888888892e-06, "loss": 0.0019, "num_tokens": 30778542.0, "reward": 1.364441156387329, "reward_std": 0.25788232684135437, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6683886647224426, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8913000822067261, "step": 150 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.653863010267969, "calib/avg_num_step_conf": 1.08984375, "calib/ece": 0.22766798418972337, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04400826446280984, "calib/mean_conf": 0.25059288537549407, "calib/mu_c": 0.2735537190082644, "calib/mu_w": 0.22954545454545458, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.08130387705202266, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 543.51953125, "completions/mean_terminated_length": 545.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.16106666666666666, "grad_norm": 0.023138921707868576, "learning_rate": 1.3611111111111112e-06, "loss": 0.0396, "num_tokens": 31000387.0, "reward": 1.333712100982666, "reward_std": 0.24264037609100342, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6918749809265137, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8932860493659973, "step": 151 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6129373019325173, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.22480314960629916, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03603430062760202, "calib/mean_conf": 0.2555118110236221, "calib/mu_c": 0.2743801652892562, "calib/mu_w": 0.23834586466165417, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0019685039370078744, "calib/std_conf": 0.08889707984732463, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2537.0, "completions/max_terminated_length": 2537.0, "completions/mean_length": 524.52734375, "completions/mean_terminated_length": 526.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.16213333333333332, "grad_norm": 0.02300630323588848, "learning_rate": 1.3333333333333334e-06, "loss": -0.0155, "num_tokens": 31215738.0, "reward": 1.3442671298980713, "reward_std": 0.2782951593399048, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6958593726158142, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.9181627035140991, "step": 152 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5876557708618777, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.2848790322580645, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03167286487897181, "calib/mean_conf": 0.24536290322580642, "calib/mu_c": 0.26030534351145046, "calib/mu_w": 0.22863247863247865, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.0010080645161290335, "calib/std_conf": 0.08548173118137481, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2081.0, "completions/max_terminated_length": 2081.0, "completions/mean_length": 523.2265625, "completions/mean_terminated_length": 529.4308471679688, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.1632, "grad_norm": 0.0198051817715168, "learning_rate": 1.3055555555555556e-06, "loss": 0.006, "num_tokens": 31432684.0, "reward": 1.3532977104187012, "reward_std": 0.2662625014781952, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6515527963638306, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8944602608680725, "step": 153 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6288372093023256, "calib/avg_num_step_conf": 1.0390625, "calib/ece": 0.2688976377952756, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.043665116279069716, "calib/mean_conf": 0.23897637795275592, "calib/mu_c": 0.26046511627906976, "calib/mu_w": 0.21680000000000005, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.08748754373132563, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 467.68359375, "completions/mean_terminated_length": 471.36614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.16426666666666667, "grad_norm": 0.015027433633804321, "learning_rate": 1.2777777777777779e-06, "loss": -0.0261, "num_tokens": 31632531.0, "reward": 1.3765439987182617, "reward_std": 0.20490823686122894, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6865234375, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9268794059753418, "step": 154 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5754440578648599, "calib/avg_num_step_conf": 1.16015625, "calib/ece": 0.26503906250000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025566135628395298, "calib/mean_conf": 0.23886718749999997, "calib/mu_c": 0.25155038759689924, "calib/mu_w": 0.22598425196850394, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0871818207015651, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1521.0, "completions/max_terminated_length": 1521.0, "completions/mean_length": 457.54296875, "completions/mean_terminated_length": 459.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.16533333333333333, "grad_norm": 0.016778098419308662, "learning_rate": 1.25e-06, "loss": -0.0047, "num_tokens": 31832558.0, "reward": 1.374552607536316, "reward_std": 0.1923726499080658, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6812012195587158, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9248706102371216, "step": 155 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6032649253731344, "calib/avg_num_step_conf": 1.17578125, "calib/ece": 0.2856299212598426, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.033669154228855774, "calib/mean_conf": 0.24192913385826773, "calib/mu_c": 0.2578358208955224, "calib/mu_w": 0.22416666666666665, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.08651054108111587, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 474.0703125, "completions/mean_terminated_length": 475.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1664, "grad_norm": 0.020775113254785538, "learning_rate": 1.2222222222222223e-06, "loss": -0.0068, "num_tokens": 32034360.0, "reward": 1.3807235956192017, "reward_std": 0.24285456538200378, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6656738519668579, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9165468215942383, "step": 156 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5783989092325672, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.37075098814229246, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021821841319309226, "calib/mean_conf": 0.23557312252964427, "calib/mu_c": 0.24437086092715235, "calib/mu_w": 0.22254901960784312, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0047430830039525695, "calib/std_conf": 0.08434452912789969, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2779.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 471.4765625, "completions/mean_terminated_length": 475.18896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.16746666666666668, "grad_norm": 0.019724013283848763, "learning_rate": 1.1944444444444446e-06, "loss": 0.0233, "num_tokens": 32234466.0, "reward": 1.4214212894439697, "reward_std": 0.22582949697971344, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6209765672683716, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9031072854995728, "step": 157 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5811827956989247, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.3725099601593626, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.028750000000000026, "calib/mean_conf": 0.24900398406374502, "calib/mu_c": 0.26, "calib/mu_w": 0.23124999999999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0019920318725099606, "calib/std_conf": 0.08942603603191979, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 486.0390625, "completions/mean_terminated_length": 491.8023986816406, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.16853333333333334, "grad_norm": 0.01671520248055458, "learning_rate": 1.1666666666666668e-06, "loss": -0.0134, "num_tokens": 32439812.0, "reward": 1.4309093952178955, "reward_std": 0.27989810705184937, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6212109327316284, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8827787637710571, "step": 158 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.637247615508485, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.2913725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04466740988480117, "calib/mean_conf": 0.25921568627450986, "calib/mu_c": 0.2797101449275362, "calib/mu_w": 0.23504273504273504, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0047058823529411795, "calib/std_conf": 0.08058227161751418, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 462.94921875, "completions/mean_terminated_length": 464.7647399902344, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1696, "grad_norm": 0.02069372683763504, "learning_rate": 1.138888888888889e-06, "loss": -0.0197, "num_tokens": 32638791.0, "reward": 1.40556001663208, "reward_std": 0.2604949176311493, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.67808598279953, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9238807559013367, "step": 159 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6567470026914607, "calib/avg_num_step_conf": 1.0859375, "calib/ece": 0.2943359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05235502813799853, "calib/mean_conf": 0.2302734375, "calib/mu_c": 0.25522388059701495, "calib/mu_w": 0.20286885245901642, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0005859375000000001, "calib/std_conf": 0.08734094605014073, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1568.0, "completions/max_terminated_length": 1568.0, "completions/mean_length": 464.828125, "completions/mean_terminated_length": 466.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.17066666666666666, "grad_norm": 0.01723591983318329, "learning_rate": 1.111111111111111e-06, "loss": 0.021, "num_tokens": 32838307.0, "reward": 1.3904207944869995, "reward_std": 0.2143954634666443, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6823534965515137, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9079135656356812, "step": 160 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5613266583229035, "calib/avg_num_step_conf": 1.1484375, "calib/ece": 0.47847656250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01935857321652054, "calib/mean_conf": 0.25980468749999996, "calib/mu_c": 0.2649468085106383, "calib/mu_w": 0.24558823529411775, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001953125, "calib/std_conf": 0.0822175618589322, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1818.0, "completions/max_terminated_length": 1818.0, "completions/mean_length": 454.125, "completions/mean_terminated_length": 455.9059143066406, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.17173333333333332, "grad_norm": 0.013422448188066483, "learning_rate": 1.0833333333333335e-06, "loss": 0.0223, "num_tokens": 33034163.0, "reward": 1.54856538772583, "reward_std": 0.1663738191127777, "rewards/accuracy_reward_step": 0.734375, "rewards/final_brier_reward_step": 0.5785152912139893, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9044186472892761, "step": 161 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6401791135397692, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.46156862745098043, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.051047358834244105, "calib/mean_conf": 0.25607843137254904, "calib/mu_c": 0.27049180327868855, "calib/mu_w": 0.21944444444444444, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09305445235678816, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1722.0, "completions/max_terminated_length": 1722.0, "completions/mean_length": 428.890625, "completions/mean_terminated_length": 430.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1728, "grad_norm": 0.019668465480208397, "learning_rate": 1.0555555555555557e-06, "loss": 0.0157, "num_tokens": 33223783.0, "reward": 1.5355653762817383, "reward_std": 0.1955740749835968, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.5876562595367432, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9216366410255432, "step": 162 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7115226337448559, "calib/avg_num_step_conf": 1.1015625, "calib/ece": 0.28591269841269845, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0785470085470085, "calib/mean_conf": 0.2501984126984127, "calib/mu_c": 0.2866666666666667, "calib/mu_w": 0.20811965811965819, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0001984126984126984, "calib/std_conf": 0.10555014755211915, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2325.0, "completions/max_terminated_length": 2325.0, "completions/mean_length": 521.4453125, "completions/mean_terminated_length": 529.7222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.17386666666666667, "grad_norm": 0.014891761355102062, "learning_rate": 1.0277777777777777e-06, "loss": -0.0231, "num_tokens": 33437785.0, "reward": 1.3962335586547852, "reward_std": 0.22696751356124878, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6867871284484863, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9207350611686707, "step": 163 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6489661654135338, "calib/avg_num_step_conf": 1.14453125, "calib/ece": 0.28715415019762847, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05587092731829579, "calib/mean_conf": 0.23853754940711466, "calib/mu_c": 0.26503759398496246, "calib/mu_w": 0.20916666666666667, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.1040059076552682, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 555.09765625, "completions/mean_terminated_length": 561.6798706054688, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.17493333333333333, "grad_norm": 0.016471445560455322, "learning_rate": 1.0000000000000002e-06, "loss": -0.0415, "num_tokens": 33661706.0, "reward": 1.3797190189361572, "reward_std": 0.20705097913742065, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6772167682647705, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.90038001537323, "step": 164 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6223961543202268, "calib/avg_num_step_conf": 1.15234375, "calib/ece": 0.2605882352941178, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04616356464932825, "calib/mean_conf": 0.2613725490196079, "calib/mu_c": 0.28345864661654135, "calib/mu_w": 0.2372950819672131, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00019607843137254904, "calib/std_conf": 0.10407424966110544, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1956.0, "completions/max_terminated_length": 1956.0, "completions/mean_length": 550.890625, "completions/mean_terminated_length": 553.051025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.176, "grad_norm": 0.011542506515979767, "learning_rate": 9.722222222222224e-07, "loss": -0.0159, "num_tokens": 33883990.0, "reward": 1.3936247825622559, "reward_std": 0.17919129133224487, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6849511861801147, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9405341744422913, "step": 165 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6533776301218162, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.3903543307086615, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.057336655592469504, "calib/mean_conf": 0.2710629921259842, "calib/mu_c": 0.29047619047619044, "calib/mu_w": 0.23313953488372094, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.09880811341224238, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2063.0, "completions/max_terminated_length": 2063.0, "completions/mean_length": 550.6171875, "completions/mean_terminated_length": 554.9527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.17706666666666668, "grad_norm": 0.013915646821260452, "learning_rate": 9.444444444444445e-07, "loss": -0.0148, "num_tokens": 34106812.0, "reward": 1.4977242946624756, "reward_std": 0.21183454990386963, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6308495998382568, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9182603359222412, "step": 166 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5557319784058432, "calib/avg_num_step_conf": 1.08203125, "calib/ece": 0.45333333333333337, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02475389012384882, "calib/mean_conf": 0.283921568627451, "calib/mu_c": 0.2904255319148936, "calib/mu_w": 0.2656716417910448, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.10744954915319879, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1595.0, "completions/max_terminated_length": 1595.0, "completions/mean_length": 527.37109375, "completions/mean_terminated_length": 529.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.17813333333333334, "grad_norm": 0.021476641297340393, "learning_rate": 9.166666666666666e-07, "loss": 0.0531, "num_tokens": 34323107.0, "reward": 1.5472520589828491, "reward_std": 0.21065786480903625, "rewards/accuracy_reward_step": 0.734375, "rewards/final_brier_reward_step": 0.5907421708106995, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8840864896774292, "step": 167 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6603872135942614, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.39570312500000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0632286370917195, "calib/mean_conf": 0.280078125, "calib/mu_c": 0.30057803468208094, "calib/mu_w": 0.23734939759036144, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.11160054388973369, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 616.66015625, "completions/mean_terminated_length": 619.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.1792, "grad_norm": 0.013876114040613174, "learning_rate": 8.88888888888889e-07, "loss": 0.0211, "num_tokens": 34561324.0, "reward": 1.5183846950531006, "reward_std": 0.21503406763076782, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6360155940055847, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.903070330619812, "step": 168 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.602853048462741, "calib/avg_num_step_conf": 1.046875, "calib/ece": 0.29150197628458496, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03785500260552371, "calib/mean_conf": 0.31482213438735174, "calib/mu_c": 0.3299342105263158, "calib/mu_w": 0.29207920792079206, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0027667984189723317, "calib/std_conf": 0.11258579505958703, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 559.39453125, "completions/mean_terminated_length": 561.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.18026666666666666, "grad_norm": 0.01111834030598402, "learning_rate": 8.611111111111112e-07, "loss": -0.0282, "num_tokens": 34784393.0, "reward": 1.4540972709655762, "reward_std": 0.20363430678844452, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6738574504852295, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8967991471290588, "step": 169 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6974222046413502, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.30314960629921267, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08229166666666665, "calib/mean_conf": 0.3188976377952756, "calib/mu_c": 0.35, "calib/mu_w": 0.2677083333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.10884669813644965, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1629.0, "completions/max_terminated_length": 1629.0, "completions/mean_length": 568.24609375, "completions/mean_terminated_length": 570.4745483398438, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.18133333333333335, "grad_norm": 0.010557408444583416, "learning_rate": 8.333333333333333e-07, "loss": -0.0077, "num_tokens": 35009696.0, "reward": 1.4908766746520996, "reward_std": 0.18308508396148682, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6905077695846558, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9121789932250977, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6503187078408317, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.23945312500000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06436660684448287, "calib/mean_conf": 0.31914062500000007, "calib/mu_c": 0.34755244755244746, "calib/mu_w": 0.2831858407079646, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.10997533348260136, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 558.64453125, "completions/mean_terminated_length": 560.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 230.0, "epoch": 0.1824, "grad_norm": 0.012995216064155102, "learning_rate": 8.055555555555557e-07, "loss": -0.0082, "num_tokens": 35235285.0, "reward": 1.4445995092391968, "reward_std": 0.24250145256519318, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7086328268051147, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9361324906349182, "step": 171 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6476407346573092, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.36156862745098045, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05843661340898909, "calib/mean_conf": 0.34823529411764703, "calib/mu_c": 0.3651933701657458, "calib/mu_w": 0.30675675675675673, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09735131211242885, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2385.0, "completions/max_terminated_length": 2385.0, "completions/mean_length": 539.7421875, "completions/mean_terminated_length": 541.8588256835938, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.18346666666666667, "grad_norm": 0.019643718376755714, "learning_rate": 7.777777777777779e-07, "loss": 0.0121, "num_tokens": 35452491.0, "reward": 1.5684814453125, "reward_std": 0.19160641729831696, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.6713671684265137, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9124413132667542, "step": 172 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4669896193771626, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.3266666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011470588235294121, "calib/mean_conf": 0.34, "calib/mu_c": 0.33617647058823524, "calib/mu_w": 0.34764705882352936, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.10245993956511139, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1751.0, "completions/max_terminated_length": 1751.0, "completions/mean_length": 617.77734375, "completions/mean_terminated_length": 620.2000122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.18453333333333333, "grad_norm": 0.011911396868526936, "learning_rate": 7.5e-07, "loss": 0.0077, "num_tokens": 35689482.0, "reward": 1.5004022121429443, "reward_std": 0.2213282287120819, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6490429639816284, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8566482067108154, "step": 173 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4596725288053366, "calib/avg_num_step_conf": 1.203125, "calib/ece": 0.31200000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01675089279698133, "calib/mean_conf": 0.32480000000000003, "calib/mu_c": 0.3183006535947712, "calib/mu_w": 0.33505154639175255, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.0124, "calib/std_conf": 0.11038550629498423, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2894.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 686.48046875, "completions/mean_terminated_length": 691.8858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.1856, "grad_norm": 0.010533534921705723, "learning_rate": 7.222222222222222e-07, "loss": -0.005, "num_tokens": 35945133.0, "reward": 1.416163444519043, "reward_std": 0.29552051424980164, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6329687833786011, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8549661636352539, "step": 174 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6765468091998704, "calib/avg_num_step_conf": 1.04296875, "calib/ece": 0.09603174603174608, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06802721088435382, "calib/mean_conf": 0.33174603174603173, "calib/mu_c": 0.37142857142857144, "calib/mu_w": 0.3034013605442176, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.005555555555555572, "calib/std_conf": 0.10775545038385559, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 638.01953125, "completions/mean_terminated_length": 643.0433349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.18666666666666668, "grad_norm": 0.009028359316289425, "learning_rate": 6.944444444444446e-07, "loss": -0.0385, "num_tokens": 36189970.0, "reward": 1.3047478199005127, "reward_std": 0.20292183756828308, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.7562109231948853, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8893823027610779, "step": 175 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5794956968141326, "calib/avg_num_step_conf": 1.03515625, "calib/ece": 0.3505928853754941, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03269666314359049, "calib/mean_conf": 0.3569169960474308, "calib/mu_c": 0.36648044692737425, "calib/mu_w": 0.33378378378378376, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.0965899866518836, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1773.0, "completions/max_terminated_length": 1773.0, "completions/mean_length": 575.5625, "completions/mean_terminated_length": 580.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.18773333333333334, "grad_norm": 0.012131310068070889, "learning_rate": 6.666666666666667e-07, "loss": -0.0189, "num_tokens": 36417058.0, "reward": 1.5568971633911133, "reward_std": 0.19418621063232422, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.6644531488418579, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9205576181411743, "step": 176 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6526360544217688, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.2535433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05884353741496601, "calib/mean_conf": 0.36062992125984256, "calib/mu_c": 0.3833333333333333, "calib/mu_w": 0.3244897959183673, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09319987860740346, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1413.0, "completions/max_terminated_length": 1413.0, "completions/mean_length": 570.96875, "completions/mean_terminated_length": 573.2078857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.1888, "grad_norm": 0.016085185110569, "learning_rate": 6.388888888888889e-07, "loss": 0.014, "num_tokens": 36642738.0, "reward": 1.486448049545288, "reward_std": 0.20098945498466492, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.712343692779541, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8929798007011414, "step": 177 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6869550173010381, "calib/avg_num_step_conf": 1.109375, "calib/ece": 0.3121568627450981, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07117647058823534, "calib/mean_conf": 0.35450980392156867, "calib/mu_c": 0.37823529411764706, "calib/mu_w": 0.3070588235294117, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.0956364383237669, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1244.0, "completions/max_terminated_length": 1244.0, "completions/mean_length": 575.24609375, "completions/mean_terminated_length": 577.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.18986666666666666, "grad_norm": 0.020766116678714752, "learning_rate": 6.111111111111112e-07, "loss": 0.016, "num_tokens": 36871753.0, "reward": 1.5327744483947754, "reward_std": 0.22211039066314697, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6946484446525574, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8996132612228394, "step": 178 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6402533521999865, "calib/avg_num_step_conf": 1.046875, "calib/ece": 0.25960000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0567818880129371, "calib/mean_conf": 0.3564, "calib/mu_c": 0.37843137254901954, "calib/mu_w": 0.32164948453608244, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.002, "calib/std_conf": 0.09065892123779104, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 621.015625, "completions/mean_terminated_length": 628.3794555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.19093333333333334, "grad_norm": 0.010622275993227959, "learning_rate": 5.833333333333334e-07, "loss": -0.009, "num_tokens": 37112677.0, "reward": 1.4636054039001465, "reward_std": 0.2516264021396637, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6956249475479126, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.9053595066070557, "step": 179 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5907735262389746, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.23593750000000008, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03666476299257554, "calib/mean_conf": 0.3617187500000001, "calib/mu_c": 0.37647058823529406, "calib/mu_w": 0.3398058252427185, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.09692224150543309, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 623.40234375, "completions/mean_terminated_length": 625.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.192, "grad_norm": 0.012773178517818451, "learning_rate": 5.555555555555555e-07, "loss": -0.0161, "num_tokens": 37351804.0, "reward": 1.4770622253417969, "reward_std": 0.1948680281639099, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7088280916213989, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.904654860496521, "step": 180 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6617485277939559, "calib/avg_num_step_conf": 1.0234375, "calib/ece": 0.25078740157480317, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06619426648547211, "calib/mean_conf": 0.3547244094488189, "calib/mu_c": 0.3810457516339869, "calib/mu_w": 0.3148514851485148, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0015748031496062992, "calib/std_conf": 0.10289760043767955, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 570.703125, "completions/mean_terminated_length": 572.9412231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.19306666666666666, "grad_norm": 0.011856647208333015, "learning_rate": 5.277777777777779e-07, "loss": 0.0644, "num_tokens": 37579848.0, "reward": 1.4679954051971436, "reward_std": 0.2306535542011261, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7007812261581421, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.9079186916351318, "step": 181 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6935108856478546, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.3135433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07587965898682447, "calib/mean_conf": 0.36, "calib/mu_c": 0.38479532163742686, "calib/mu_w": 0.3089156626506024, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00015748031496062991, "calib/std_conf": 0.09142786838874807, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 613.7734375, "completions/mean_terminated_length": 613.7734375, "completions/min_length": 230.0, "completions/min_terminated_length": 230.0, "epoch": 0.19413333333333332, "grad_norm": 0.014810623601078987, "learning_rate": 5.000000000000001e-07, "loss": 0.0274, "num_tokens": 37818814.0, "reward": 1.5463969707489014, "reward_std": 0.17773394286632538, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7013999819755554, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9202882051467896, "step": 182 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.628010878010878, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.24274509803921573, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.046037296037296005, "calib/mean_conf": 0.37058823529411766, "calib/mu_c": 0.3884615384615384, "calib/mu_w": 0.3424242424242424, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007843137254901962, "calib/std_conf": 0.09475275907216514, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 601.56640625, "completions/mean_terminated_length": 603.925537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1952, "grad_norm": 0.016529878601431847, "learning_rate": 4.7222222222222226e-07, "loss": 0.0203, "num_tokens": 38055175.0, "reward": 1.4892892837524414, "reward_std": 0.28105586767196655, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7111327648162842, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.891141414642334, "step": 183 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6555634301913537, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.3063241106719368, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05875974486180008, "calib/mean_conf": 0.3671936758893281, "calib/mu_c": 0.38647058823529407, "calib/mu_w": 0.327710843373494, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0007905138339920949, "calib/std_conf": 0.08706245792854107, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1799.0, "completions/max_terminated_length": 1799.0, "completions/mean_length": 602.84375, "completions/mean_terminated_length": 607.590576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.19626666666666667, "grad_norm": 0.010777152143418789, "learning_rate": 4.444444444444445e-07, "loss": -0.0174, "num_tokens": 38290463.0, "reward": 1.5306932926177979, "reward_std": 0.21238690614700317, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.6872656345367432, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.904492199420929, "step": 184 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.7062724014336916, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.276326530612245, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.08168458781362004, "calib/mean_conf": 0.3661224489795919, "calib/mu_c": 0.39612903225806445, "calib/mu_w": 0.3144444444444444, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.004897959183673465, "calib/std_conf": 0.09321505367019424, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 650.30078125, "completions/mean_terminated_length": 655.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.19733333333333333, "grad_norm": 0.01256662979722023, "learning_rate": 4.1666666666666667e-07, "loss": 0.0048, "num_tokens": 38539540.0, "reward": 1.4508123397827148, "reward_std": 0.222275048494339, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6873437166213989, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.8676245808601379, "step": 185 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6813147295742232, "calib/avg_num_step_conf": 0.9921875, "calib/ece": 0.3235294117647059, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07091484464902187, "calib/mean_conf": 0.36666666666666664, "calib/mu_c": 0.38863636363636367, "calib/mu_w": 0.3177215189873418, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.09344531097808413, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 607.05859375, "completions/mean_terminated_length": 607.05859375, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.1984, "grad_norm": 0.009213064797222614, "learning_rate": 3.8888888888888895e-07, "loss": 0.0548, "num_tokens": 38775667.0, "reward": 1.559600591659546, "reward_std": 0.18579980731010437, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6948046684265137, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9128557443618774, "step": 186 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6369402985074628, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.16181102362204727, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05512437810945264, "calib/mean_conf": 0.365748031496063, "calib/mu_c": 0.39179104477611937, "calib/mu_w": 0.3366666666666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09457754960785239, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2285.0, "completions/max_terminated_length": 2285.0, "completions/mean_length": 634.3359375, "completions/mean_terminated_length": 639.3306884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.19946666666666665, "grad_norm": 0.009206795133650303, "learning_rate": 3.611111111111111e-07, "loss": -0.0023, "num_tokens": 39015281.0, "reward": 1.4204821586608887, "reward_std": 0.2516452372074127, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7373046875, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.9229444265365601, "step": 187 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6273310023310024, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.3295275590551181, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05447261072261067, "calib/mean_conf": 0.36338582677165354, "calib/mu_c": 0.3801136363636363, "calib/mu_w": 0.32564102564102565, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09410766553530815, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1914.0, "completions/max_terminated_length": 1914.0, "completions/mean_length": 625.09765625, "completions/mean_terminated_length": 627.549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.20053333333333334, "grad_norm": 0.009032557718455791, "learning_rate": 3.3333333333333335e-07, "loss": 0.024, "num_tokens": 39255058.0, "reward": 1.5570321083068848, "reward_std": 0.19003382325172424, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.6839843392372131, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9085971117019653, "step": 188 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5884168362991661, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.24724409448818907, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029588285507912482, "calib/mean_conf": 0.3708661417322835, "calib/mu_c": 0.38216560509554137, "calib/mu_w": 0.3525773195876289, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.08927550582456467, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 600.6015625, "completions/mean_terminated_length": 602.9569091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 256.0, "epoch": 0.2016, "grad_norm": 0.00952502153813839, "learning_rate": 3.055555555555556e-07, "loss": -0.0013, "num_tokens": 39492260.0, "reward": 1.4871289730072021, "reward_std": 0.22185944020748138, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7032812833786011, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8982037305831909, "step": 189 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5954418725280426, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.2550781250000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03285353044154826, "calib/mean_conf": 0.366796875, "calib/mu_c": 0.37924528301886784, "calib/mu_w": 0.3463917525773196, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.000390625, "calib/std_conf": 0.08356727822679387, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1795.0, "completions/max_terminated_length": 1795.0, "completions/mean_length": 653.4296875, "completions/mean_terminated_length": 655.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.20266666666666666, "grad_norm": 0.015201202593743801, "learning_rate": 2.7777777777777776e-07, "loss": 0.0272, "num_tokens": 39740826.0, "reward": 1.5011475086212158, "reward_std": 0.15234965085983276, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7049218416213989, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.9150592088699341, "step": 190 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6745073108709473, "calib/avg_num_step_conf": 1.0703125, "calib/ece": 0.1964426877470356, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.06468531468531474, "calib/mean_conf": 0.3774703557312253, "calib/mu_c": 0.40559440559440557, "calib/mu_w": 0.34090909090909083, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.004347826086956521, "calib/std_conf": 0.09823704950740778, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2133.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 587.34375, "completions/mean_terminated_length": 589.6470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.20373333333333332, "grad_norm": 0.012601793743669987, "learning_rate": 2.5000000000000004e-07, "loss": 0.0447, "num_tokens": 39971034.0, "reward": 1.4446088075637817, "reward_std": 0.1962060034275055, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7324609756469727, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.893200695514679, "step": 191 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6367849153084052, "calib/avg_num_step_conf": 1.0546875, "calib/ece": 0.23011811023622056, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.048191115372323345, "calib/mean_conf": 0.37303149606299213, "calib/mu_c": 0.3929530201342281, "calib/mu_w": 0.34476190476190477, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.00826771653543307, "calib/std_conf": 0.09736217288446995, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2149.0, "completions/max_terminated_length": 2149.0, "completions/mean_length": 617.0859375, "completions/mean_terminated_length": 621.9448852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 164.0, "epoch": 0.2048, "grad_norm": 0.01687338761985302, "learning_rate": 2.2222222222222224e-07, "loss": 0.0049, "num_tokens": 40209664.0, "reward": 1.4559879302978516, "reward_std": 0.24735161662101746, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7122167944908142, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8948308229446411, "step": 192 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.564126942495574, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.2158730158730159, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029060389482656823, "calib/mean_conf": 0.3936507936507936, "calib/mu_c": 0.40529801324503306, "calib/mu_w": 0.37623762376237624, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005158730158730159, "calib/std_conf": 0.08613945111882626, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1519.0, "completions/max_terminated_length": 1519.0, "completions/mean_length": 568.9140625, "completions/mean_terminated_length": 573.3936767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.20586666666666667, "grad_norm": 0.013111384585499763, "learning_rate": 1.9444444444444447e-07, "loss": 0.0052, "num_tokens": 40436698.0, "reward": 1.4665391445159912, "reward_std": 0.24395251274108887, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7128125429153442, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8842817544937134, "step": 193 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.622751124437781, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.17821522309711285, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04470264867566215, "calib/mean_conf": 0.3650918635170603, "calib/mu_c": 0.3855072463768116, "calib/mu_w": 0.34080459770114946, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.09950518591905787, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1691.0, "completions/max_terminated_length": 1691.0, "completions/mean_length": 594.44921875, "completions/mean_terminated_length": 596.7804565429688, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.20693333333333333, "grad_norm": 0.010096187703311443, "learning_rate": 1.6666666666666668e-07, "loss": 0.0033, "num_tokens": 40670501.0, "reward": 1.4261138439178467, "reward_std": 0.19851510226726532, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.723745584487915, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.9147768020629883, "step": 194 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5836825396825397, "calib/avg_num_step_conf": 1.02734375, "calib/ece": 0.21058823529411766, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032095238095237955, "calib/mean_conf": 0.38078431372549015, "calib/mu_c": 0.3939999999999999, "calib/mu_w": 0.36190476190476195, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.09279627987081357, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 604.9453125, "completions/mean_terminated_length": 607.3176879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.208, "grad_norm": 0.012461402453482151, "learning_rate": 1.3888888888888888e-07, "loss": 0.005, "num_tokens": 40907031.0, "reward": 1.4583547115325928, "reward_std": 0.2264489382505417, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7117577791213989, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8802154064178467, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5707578145521408, "calib/avg_num_step_conf": 1.0, "calib/ece": 0.21445312500000008, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029012345679012397, "calib/mean_conf": 0.4183593750000001, "calib/mu_c": 0.42901234567901236, "calib/mu_w": 0.39999999999999997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07815526757429325, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1503.0, "completions/max_terminated_length": 1503.0, "completions/mean_length": 479.5625, "completions/mean_terminated_length": 481.44317626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.20906666666666668, "grad_norm": 0.01966879330575466, "learning_rate": 1.1111111111111112e-07, "loss": -0.0071, "num_tokens": 41108023.0, "reward": 1.5250952243804932, "reward_std": 0.17813417315483093, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7290234565734863, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9110841751098633, "step": 196 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6216659446747942, "calib/avg_num_step_conf": 1.00390625, "calib/ece": 0.18750000000000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05338201621387456, "calib/mean_conf": 0.38203125000000004, "calib/mu_c": 0.4055944055944055, "calib/mu_w": 0.35221238938053095, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00546875, "calib/std_conf": 0.09390287015548299, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1963.0, "completions/max_terminated_length": 1963.0, "completions/mean_length": 636.0234375, "completions/mean_terminated_length": 638.5177001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.21013333333333334, "grad_norm": 0.011499550193548203, "learning_rate": 8.333333333333334e-08, "loss": -0.0092, "num_tokens": 41351581.0, "reward": 1.4608979225158691, "reward_std": 0.24005961418151855, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7397655844688416, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.9296857118606567, "step": 197 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5870192307692308, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.2122047244094489, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03142307692307689, "calib/mean_conf": 0.3791338582677165, "calib/mu_c": 0.39199999999999996, "calib/mu_w": 0.3605769230769231, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0003937007874015748, "calib/std_conf": 0.07835436987695427, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2624.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 575.80859375, "completions/mean_terminated_length": 578.0667114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.2112, "grad_norm": 0.00982653722167015, "learning_rate": 5.555555555555556e-08, "loss": 0.0295, "num_tokens": 41580052.0, "reward": 1.4629411697387695, "reward_std": 0.24547307193279266, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7108592987060547, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.9050459265708923, "step": 198 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5179347826086957, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.25515873015873014, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008016304347826186, "calib/mean_conf": 0.3876984126984127, "calib/mu_c": 0.390625, "calib/mu_w": 0.3826086956521738, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.003968253968253968, "calib/std_conf": 0.08845824679503829, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2545.0, "completions/max_terminated_length": 2545.0, "completions/mean_length": 639.46484375, "completions/mean_terminated_length": 641.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 197.0, "epoch": 0.21226666666666666, "grad_norm": 0.012449882924556732, "learning_rate": 2.777777777777778e-08, "loss": 0.0039, "num_tokens": 41823635.0, "reward": 1.4842878580093384, "reward_std": 0.2170136272907257, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6845703125, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8851981163024902, "step": 199 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.7157153729071537, "calib/avg_num_step_conf": 1.01953125, "calib/ece": 0.32964426877470365, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.07338660578386608, "calib/mean_conf": 0.39604743083003946, "calib/mu_c": 0.4172222222222222, "calib/mu_w": 0.34383561643835614, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.007114624505928857, "calib/std_conf": 0.07892677926720582, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 621.4921875, "completions/mean_terminated_length": 621.4921875, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 0.21333333333333335, "grad_norm": 0.013002941384911537, "learning_rate": 0.0, "loss": 0.009, "num_tokens": 42066465.0, "reward": 1.572641134262085, "reward_std": 0.18552997708320618, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7069531083106995, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8829084038734436, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.011818162081181072, "train_runtime": 8954.9185, "train_samples_per_second": 5.718, "train_steps_per_second": 0.022 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 42066465, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }