{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.5285714285714285, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.02833333333333321, "calib/mean_conf": 0.9571428571428572, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.9450000000000001, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5285714285714285, "calib/std_conf": 0.033896601479156206, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2853.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 658.8203125, "completions/mean_terminated_length": 714.6525268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.09998760372400284, "learning_rate": 2.5000000000000004e-07, "loss": 0.0145, "num_tokens": 276242.0, "reward": 0.07658073306083679, "reward_std": 0.14345498383045197, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.024793751537799835, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.03152916580438614, "step": 1 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.1851851851851852, "calib/avg_num_step_conf": 0.24609375, "calib/ece": 0.2141666666666665, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.01666666666666672, "calib/mean_conf": 0.9641666666666665, "calib/mu_c": 0.9599999999999999, "calib/mu_w": 0.9766666666666666, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.2141666666666665, "calib/std_conf": 0.014409680388158833, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 749.54296875, "completions/mean_terminated_length": 820.0128784179688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.0952862948179245, "learning_rate": 5.000000000000001e-07, "loss": -0.0143, "num_tokens": 571413.0, "reward": 0.09923964738845825, "reward_std": 0.2225067913532257, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.03563320264220238, "rewards/format_reward_step": 0.046875, "rewards/stepwise_brier_reward": 0.03320039063692093, "step": 2 }, { "calib/answer_extract_rate": 0.02734375, "calib/auroc": 0.4444444444444445, "calib/avg_num_step_conf": 0.19140625, "calib/ece": 0.46833333333333327, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0033333333333334103, "calib/mean_conf": 0.9683333333333333, "calib/mu_c": 0.9666666666666667, "calib/mu_w": 0.9700000000000001, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.04296875, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.46833333333333327, "calib/std_conf": 0.01343709624716426, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2791.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 676.359375, "completions/mean_terminated_length": 736.7999877929688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.05166647955775261, "learning_rate": 7.5e-07, "loss": -0.0104, "num_tokens": 849817.0, "reward": 0.03318578749895096, "reward_std": 0.08771081268787384, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.012013280764222145, "rewards/format_reward_step": 0.01953125, "rewards/stepwise_brier_reward": 0.011354869231581688, "step": 3 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.5857142857142856, "calib/avg_num_step_conf": 0.29296875, "calib/ece": 0.3483333333333334, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": 0.04057142857142848, "calib/mean_conf": 0.9316666666666666, "calib/mu_c": 0.9485714285714285, "calib/mu_w": 0.908, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.3483333333333334, "calib/std_conf": 0.07525881269917091, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 731.8671875, "completions/mean_terminated_length": 836.419677734375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.015893759205937386, "learning_rate": 1.0000000000000002e-06, "loss": 0.0075, "num_tokens": 1143343.0, "reward": 0.0737013965845108, "reward_std": 0.1637091338634491, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.026279686018824577, "rewards/format_reward_step": 0.0390625, "rewards/stepwise_brier_reward": 0.026338398456573486, "step": 4 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.375, "calib/avg_num_step_conf": 0.125, "calib/ece": 0.5357142857142858, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.007499999999999951, "calib/mean_conf": 0.9642857142857143, "calib/mu_c": 0.96, "calib/mu_w": 0.9674999999999999, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.03125, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.5357142857142858, "calib/std_conf": 0.019897697538834472, "calib/step_conf_rate": 0.02734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 741.08984375, "completions/mean_terminated_length": 821.2943725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.011378278955817223, "learning_rate": 1.25e-06, "loss": -0.0188, "num_tokens": 1439750.0, "reward": 0.03778243437409401, "reward_std": 0.1001213788986206, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.012688672170042992, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.013441067188978195, "step": 5 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.4571428571428572, "calib/avg_num_step_conf": 0.1875, "calib/ece": 0.5433333333333332, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": -0.013714285714285679, "calib/mean_conf": 0.96, "calib/mu_c": 0.952, "calib/mu_w": 0.9657142857142856, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.5433333333333332, "calib/std_conf": 0.03135814620371129, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2990.0, "completions/max_terminated_length": 2990.0, "completions/mean_length": 626.26953125, "completions/mean_terminated_length": 691.0560302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.016327349469065666, "learning_rate": 1.5e-06, "loss": -0.0036, "num_tokens": 1706027.0, "reward": 0.05387546867132187, "reward_std": 0.10294744372367859, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.016544923186302185, "rewards/format_reward_step": 0.03125, "rewards/stepwise_brier_reward": 0.01926945522427559, "step": 6 }, { "calib/answer_extract_rate": 0.05078125, "calib/auroc": 0.32, "calib/avg_num_step_conf": 0.26171875, "calib/ece": 0.594, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.04599999999999993, "calib/mean_conf": 0.9273333333333333, "calib/mu_c": 0.9579999999999999, "calib/mu_w": 0.9119999999999999, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.0546875, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.594, "calib/std_conf": 0.1610576156397317, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 796.58203125, "completions/mean_terminated_length": 871.4744262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.014986117370426655, "learning_rate": 1.75e-06, "loss": 0.0125, "num_tokens": 2017376.0, "reward": 0.06252811849117279, "reward_std": 0.15057691931724548, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02071210741996765, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.026275351643562317, "step": 7 }, { "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.4, "calib/avg_num_step_conf": 0.1328125, "calib/ece": 0.4640000000000001, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": -0.0040000000000000036, "calib/mean_conf": 0.9640000000000001, "calib/mu_c": 0.962, "calib/mu_w": 0.966, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.4640000000000001, "calib/std_conf": 0.02870540018881465, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 675.36328125, "completions/mean_terminated_length": 720.3875122070312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.009805521927773952, "learning_rate": 2.0000000000000003e-06, "loss": 0.0075, "num_tokens": 2296781.0, "reward": 0.053026750683784485, "reward_std": 0.12672922015190125, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02003437466919422, "rewards/format_reward_step": 0.02734375, "rewards/stepwise_brier_reward": 0.02019762247800827, "step": 8 }, { "calib/answer_extract_rate": 0.03125, "calib/auroc": 0.16666666666666663, "calib/avg_num_step_conf": 0.12890625, "calib/ece": 0.5988888888888889, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.03833333333333333, "calib/mean_conf": 0.9322222222222223, "calib/mu_c": 0.9066666666666666, "calib/mu_w": 0.945, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.0390625, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.5988888888888889, "calib/std_conf": 0.06196374886043859, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2990.0, "completions/max_terminated_length": 2990.0, "completions/mean_length": 663.30859375, "completions/mean_terminated_length": 738.291259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0096, "grad_norm": 0.045146096497774124, "learning_rate": 2.25e-06, "loss": 0.0096, "num_tokens": 2574124.0, "reward": 0.03620094433426857, "reward_std": 0.09545932710170746, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.013463281095027924, "rewards/format_reward_step": 0.0234375, "rewards/stepwise_brier_reward": 0.014152996242046356, "step": 9 }, { "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.5625, "calib/avg_num_step_conf": 0.21484375, "calib/ece": 0.7539999999999999, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.020000000000000018, "calib/mean_conf": 0.954, "calib/mu_c": 0.97, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.7539999999999999, "calib/std_conf": 0.03611094017053558, "calib/step_conf_rate": 0.0390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 685.203125, "completions/mean_terminated_length": 776.1593017578125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.010666666666666666, "grad_norm": 0.008990222588181496, "learning_rate": 2.5e-06, "loss": -0.0291, "num_tokens": 2856336.0, "reward": 0.033995941281318665, "reward_std": 0.07464214414358139, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.009489063173532486, "rewards/format_reward_step": 0.03125, "rewards/stepwise_brier_reward": 0.01711970567703247, "step": 10 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.4761904761904762, "calib/avg_num_step_conf": 0.23828125, "calib/ece": 0.6400000000000001, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.009523809523809601, "calib/mean_conf": 0.9400000000000001, "calib/mu_c": 0.9466666666666667, "calib/mu_w": 0.937142857142857, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.6400000000000001, "calib/std_conf": 0.0679705818718657, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12109375, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 726.953125, "completions/mean_terminated_length": 827.1111450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.011733333333333333, "grad_norm": 0.015153302811086178, "learning_rate": 2.7500000000000004e-06, "loss": 0.0004, "num_tokens": 3146916.0, "reward": 0.04430060833692551, "reward_std": 0.1161235123872757, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01468046847730875, "rewards/format_reward_step": 0.03515625, "rewards/stepwise_brier_reward": 0.021896956488490105, "step": 11 }, { "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.47727272727272724, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.6117647058823528, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0021212121212120794, "calib/mean_conf": 0.9647058823529411, "calib/mu_c": 0.9633333333333333, "calib/mu_w": 0.9654545454545453, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6117647058823528, "calib/std_conf": 0.020896846668625433, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 675.05859375, "completions/mean_terminated_length": 735.3829345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.014090793207287788, "learning_rate": 3e-06, "loss": 0.0051, "num_tokens": 3423907.0, "reward": 0.07500467449426651, "reward_std": 0.17258451879024506, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.021091407164931297, "rewards/format_reward_step": 0.04296875, "rewards/stepwise_brier_reward": 0.028927277773618698, "step": 12 }, { "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.5303030303030303, "calib/avg_num_step_conf": 0.3984375, "calib/ece": 0.3279411764705882, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0038636363636362914, "calib/mean_conf": 0.975, "calib/mu_c": 0.9763636363636364, "calib/mu_w": 0.9725000000000001, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.3279411764705882, "calib/std_conf": 0.01680336100833613, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 767.99609375, "completions/mean_terminated_length": 843.806884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.01971305161714554, "learning_rate": 3.2500000000000002e-06, "loss": -0.0259, "num_tokens": 3725106.0, "reward": 0.10816285759210587, "reward_std": 0.20174898207187653, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.03248114883899689, "rewards/format_reward_step": 0.0546875, "rewards/stepwise_brier_reward": 0.03298277035355568, "step": 13 }, { "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.6192307692307693, "calib/avg_num_step_conf": 0.4375, "calib/ece": 0.5260869565217392, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.9565217391304348, "calib/gap": 0.009076923076922983, "calib/mean_conf": 0.9608695652173914, "calib/mu_c": 0.966, "calib/mu_w": 0.956923076923077, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.5260869565217392, "calib/std_conf": 0.025179882148817844, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 835.2421875, "completions/mean_terminated_length": 906.0254516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.020166773349046707, "learning_rate": 3.5e-06, "loss": -0.0202, "num_tokens": 4044328.0, "reward": 0.11651969701051712, "reward_std": 0.2357882559299469, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.031532421708106995, "rewards/format_reward_step": 0.07421875, "rewards/stepwise_brier_reward": 0.05173388123512268, "step": 14 }, { "calib/answer_extract_rate": 0.12109375, "calib/auroc": 0.45714285714285713, "calib/avg_num_step_conf": 0.6484375, "calib/ece": 0.456896551724138, "calib/final_conf_rate": 0.11328125, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.896551724137931, "calib/gap": 0.03795238095238118, "calib/mean_conf": 0.9396551724137929, "calib/mu_c": 0.9592857142857144, "calib/mu_w": 0.9213333333333332, "calib/nonempty_final_conf_rate": 0.11328125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.456896551724138, "calib/std_conf": 0.12411015014815611, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 692.48828125, "completions/mean_terminated_length": 760.8455200195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.021689649671316147, "learning_rate": 3.7500000000000005e-06, "loss": -0.0017, "num_tokens": 4329485.0, "reward": 0.16236014664173126, "reward_std": 0.3670760989189148, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.06107109412550926, "rewards/format_reward_step": 0.09765625, "rewards/stepwise_brier_reward": 0.06493200361728668, "step": 15 }, { "calib/answer_extract_rate": 0.15625, "calib/auroc": 0.4028132992327365, "calib/avg_num_step_conf": 0.91015625, "calib/ece": 0.5319749999999999, "calib/final_conf_rate": 0.15625, "calib/format_rate": 0.1171875, "calib/frac_conf_gt_0.9": 0.825, "calib/gap": -0.007117647058823562, "calib/mean_conf": 0.9569749999999999, "calib/mu_c": 0.9528823529411764, "calib/mu_w": 0.96, "calib/nonempty_final_conf_rate": 0.15625, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.140625, "calib/pce": 0.5319749999999999, "calib/std_conf": 0.03168397662857362, "calib/step_conf_rate": 0.140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3020.0, "completions/max_terminated_length": 3020.0, "completions/mean_length": 816.5859375, "completions/mean_terminated_length": 901.0603637695312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.014984040521085262, "learning_rate": 4.000000000000001e-06, "loss": -0.0152, "num_tokens": 4647379.0, "reward": 0.19561436772346497, "reward_std": 0.37104684114456177, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.061235152184963226, "rewards/format_reward_step": 0.1171875, "rewards/stepwise_brier_reward": 0.08840983361005783, "step": 16 }, { "calib/answer_extract_rate": 0.25, "calib/auroc": 0.4502032520325203, "calib/avg_num_step_conf": 1.46484375, "calib/ece": 0.32492307692307676, "calib/final_conf_rate": 0.25390625, "calib/format_rate": 0.2109375, "calib/frac_conf_gt_0.9": 0.8153846153846154, "calib/gap": -0.0071341463414634765, "calib/mean_conf": 0.948, "calib/mu_c": 0.9453658536585365, "calib/mu_w": 0.9525, "calib/nonempty_final_conf_rate": 0.25390625, "calib/nonempty_reasoning_rate": 0.26953125, "calib/nonempty_step_conf_rate": 0.24609375, "calib/pce": 0.3210769230769229, "calib/std_conf": 0.04184954737371259, "calib/step_conf_rate": 0.24609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 699.859375, "completions/mean_terminated_length": 731.2816162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.018133333333333335, "grad_norm": 0.025489211082458496, "learning_rate": 4.25e-06, "loss": 0.0229, "num_tokens": 4930071.0, "reward": 0.4290560483932495, "reward_std": 0.5249615907669067, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.15336796641349792, "rewards/format_reward_step": 0.2109375, "rewards/stepwise_brier_reward": 0.15660616755485535, "step": 17 }, { "calib/answer_extract_rate": 0.328125, "calib/auroc": 0.4468085106382979, "calib/avg_num_step_conf": 2.16796875, "calib/ece": 0.3682499999999999, "calib/final_conf_rate": 0.3125, "calib/format_rate": 0.27734375, "calib/frac_conf_gt_0.9": 0.8875, "calib/gap": 0.002050290135396593, "calib/mean_conf": 0.9557500000000001, "calib/mu_c": 0.9565957446808512, "calib/mu_w": 0.9545454545454546, "calib/nonempty_final_conf_rate": 0.3125, "calib/nonempty_reasoning_rate": 0.359375, "calib/nonempty_step_conf_rate": 0.3203125, "calib/pce": 0.3682499999999999, "calib/std_conf": 0.04366849550877611, "calib/step_conf_rate": 0.3203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 676.234375, "completions/mean_terminated_length": 724.334716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.0192, "grad_norm": 0.037937141954898834, "learning_rate": 4.5e-06, "loss": -0.0288, "num_tokens": 5213907.0, "reward": 0.5089144110679626, "reward_std": 0.6876594424247742, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.17252811789512634, "rewards/format_reward_step": 0.27734375, "rewards/stepwise_brier_reward": 0.18344208598136902, "step": 18 }, { "calib/answer_extract_rate": 0.66796875, "calib/auroc": 0.5192307692307692, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.48286144578313256, "calib/final_conf_rate": 0.6484375, "calib/format_rate": 0.578125, "calib/frac_conf_gt_0.9": 0.9156626506024096, "calib/gap": -0.002026515151515196, "calib/mean_conf": 0.9527409638554215, "calib/mu_c": 0.9516666666666665, "calib/mu_w": 0.9536931818181817, "calib/nonempty_final_conf_rate": 0.6484375, "calib/nonempty_reasoning_rate": 0.7265625, "calib/nonempty_step_conf_rate": 0.66796875, "calib/pce": 0.48286144578313256, "calib/std_conf": 0.037823134809026944, "calib/step_conf_rate": 0.66796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 550.953125, "completions/mean_terminated_length": 566.4417724609375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.020266666666666665, "grad_norm": 0.03861883282661438, "learning_rate": 4.75e-06, "loss": -0.0044, "num_tokens": 5459711.0, "reward": 0.9206970930099487, "reward_std": 0.7485260367393494, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.3020592927932739, "rewards/format_reward_step": 0.578125, "rewards/stepwise_brier_reward": 0.3963542580604553, "step": 19 }, { "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.4868912337662337, "calib/avg_num_step_conf": 6.06640625, "calib/ece": 0.4664549549549549, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.9234234234234234, "calib/gap": -0.004156655844156032, "calib/mean_conf": 0.9558243243243243, "calib/mu_c": 0.9537272727272725, "calib/mu_w": 0.9578839285714286, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.46339189189189184, "calib/std_conf": 0.04347548489327777, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2736.0, "completions/max_terminated_length": 2736.0, "completions/mean_length": 461.83984375, "completions/mean_terminated_length": 467.31622314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.021333333333333333, "grad_norm": 0.021412760019302368, "learning_rate": 5e-06, "loss": -0.0089, "num_tokens": 5682814.0, "reward": 1.3245768547058105, "reward_std": 0.7944626808166504, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.45545026659965515, "rewards/format_reward_step": 0.8203125, "rewards/stepwise_brier_reward": 0.5772321224212646, "step": 20 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5046546546546546, "calib/avg_num_step_conf": 6.984375, "calib/ece": 0.4655580086580087, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.859375, "calib/frac_conf_gt_0.9": 0.8831168831168831, "calib/gap": 0.009543468468468652, "calib/mean_conf": 0.9460774891774892, "calib/mu_c": 0.9510351351351353, "calib/mu_w": 0.9414916666666666, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.4655580086580087, "calib/std_conf": 0.08062289489701147, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 496.0390625, "completions/mean_terminated_length": 503.9127197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 40.0, "epoch": 0.0224, "grad_norm": 0.025596238672733307, "learning_rate": 4.9722222222222224e-06, "loss": -0.03, "num_tokens": 5912760.0, "reward": 1.365824818611145, "reward_std": 0.7561129331588745, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.46789538860321045, "rewards/format_reward_step": 0.859375, "rewards/stepwise_brier_reward": 0.6047787666320801, "step": 21 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5091642228739003, "calib/avg_num_step_conf": 7.29296875, "calib/ece": 0.4596489795918367, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.8775510204081632, "calib/gap": 0.0003416422287390031, "calib/mean_conf": 0.9492816326530612, "calib/mu_c": 0.9494545454545454, "calib/mu_w": 0.9491129032258064, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4575265306122449, "calib/std_conf": 0.046251290874293714, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2794.0, "completions/max_terminated_length": 2794.0, "completions/mean_length": 480.875, "completions/mean_terminated_length": 486.57708740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.023466666666666667, "grad_norm": 0.14732640981674194, "learning_rate": 4.944444444444445e-06, "loss": 0.019, "num_tokens": 6137680.0, "reward": 1.470207691192627, "reward_std": 0.742310643196106, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5073757171630859, "rewards/format_reward_step": 0.93359375, "rewards/stepwise_brier_reward": 0.6703301668167114, "step": 22 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4517401992495796, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.42261044176706813, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.8674698795180723, "calib/gap": -0.008777979039979233, "calib/mean_conf": 0.946144578313253, "calib/mu_c": 0.9419847328244275, "calib/mu_w": 0.9507627118644068, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.42132530120481915, "calib/std_conf": 0.04665083474335586, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2701.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 478.09765625, "completions/mean_terminated_length": 483.7668151855469, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.024533333333333334, "grad_norm": 0.012211649678647518, "learning_rate": 4.9166666666666665e-06, "loss": 0.0314, "num_tokens": 6364009.0, "reward": 1.54275381565094, "reward_std": 0.727489173412323, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5397515296936035, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.6625137329101562, "step": 23 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.46217724583109204, "calib/avg_num_step_conf": 7.7421875, "calib/ece": 0.5186194331983806, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.8218623481781376, "calib/gap": -0.007405594405594518, "calib/mean_conf": 0.9396720647773279, "calib/mu_c": 0.9353846153846154, "calib/mu_w": 0.9427902097902099, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.5186194331983806, "calib/std_conf": 0.04596488878480891, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2213.0, "completions/max_terminated_length": 2213.0, "completions/mean_length": 493.14453125, "completions/mean_terminated_length": 497.0275573730469, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.0256, "grad_norm": 0.010158343240618706, "learning_rate": 4.888888888888889e-06, "loss": -0.008, "num_tokens": 6594766.0, "reward": 1.3534126281738281, "reward_std": 0.7272195816040039, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.4507281184196472, "rewards/format_reward_step": 0.94140625, "rewards/stepwise_brier_reward": 0.6191724538803101, "step": 24 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5088761174968072, "calib/avg_num_step_conf": 7.953125, "calib/ece": 0.3985657370517929, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8127490039840638, "calib/gap": 0.0011871008939976502, "calib/mean_conf": 0.936414342629482, "calib/mu_c": 0.936962962962963, "calib/mu_w": 0.9357758620689653, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3985657370517929, "calib/std_conf": 0.04696242917959356, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2319.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 464.69921875, "completions/mean_terminated_length": 470.2095031738281, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.02666666666666667, "grad_norm": 0.01881440542638302, "learning_rate": 4.861111111111111e-06, "loss": 0.0143, "num_tokens": 6816953.0, "reward": 1.5973844528198242, "reward_std": 0.6234603524208069, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5789656043052673, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6933847665786743, "step": 25 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.40984695168226665, "calib/avg_num_step_conf": 7.50390625, "calib/ece": 0.39682539682539697, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7261904761904762, "calib/gap": -0.015529977232481729, "calib/mean_conf": 0.9255555555555556, "calib/mu_c": 0.9182835820895522, "calib/mu_w": 0.9338135593220339, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.39531746031746046, "calib/std_conf": 0.05454800646053292, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2068.0, "completions/max_terminated_length": 2068.0, "completions/mean_length": 466.38671875, "completions/mean_terminated_length": 470.0590515136719, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 0.027733333333333332, "grad_norm": 0.009599103592336178, "learning_rate": 4.833333333333333e-06, "loss": 0.0171, "num_tokens": 7041588.0, "reward": 1.5898265838623047, "reward_std": 0.6190015077590942, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5721874833106995, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6855560541152954, "step": 26 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4757980833791125, "calib/avg_num_step_conf": 7.49609375, "calib/ece": 0.41597656250000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.62109375, "calib/gap": -0.006178966001343045, "calib/mean_conf": 0.9095703125000001, "calib/mu_c": 0.9064566929133857, "calib/mu_w": 0.9126356589147288, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41472656250000006, "calib/std_conf": 0.0550373713366867, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1052.0, "completions/max_terminated_length": 1052.0, "completions/mean_length": 463.4453125, "completions/mean_terminated_length": 470.8016052246094, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.0288, "grad_norm": 0.012968444265425205, "learning_rate": 4.805555555555556e-06, "loss": -0.0082, "num_tokens": 7265446.0, "reward": 1.5642105340957642, "reward_std": 0.6272684335708618, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5723339915275574, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.715758204460144, "step": 27 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4433365917236885, "calib/avg_num_step_conf": 6.921875, "calib/ece": 0.2807086614173228, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5, "calib/gap": -0.006353861192570864, "calib/mean_conf": 0.8876377952755905, "calib/mu_c": 0.8851612903225807, "calib/mu_w": 0.8915151515151516, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2790551181102362, "calib/std_conf": 0.07656973243944275, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1428.0, "completions/max_terminated_length": 1428.0, "completions/mean_length": 459.84765625, "completions/mean_terminated_length": 467.1468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.029866666666666666, "grad_norm": 0.012692811898887157, "learning_rate": 4.777777777777778e-06, "loss": -0.0264, "num_tokens": 7490111.0, "reward": 1.7726266384124756, "reward_std": 0.607335090637207, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6710312962532043, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7788500785827637, "step": 28 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4182775178527456, "calib/avg_num_step_conf": 7.19140625, "calib/ece": 0.3746274509803921, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4666666666666667, "calib/gap": -0.01619859640482635, "calib/mean_conf": 0.8848235294117647, "calib/mu_c": 0.8769465648854962, "calib/mu_w": 0.8931451612903225, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3728627450980392, "calib/std_conf": 0.07264652694891428, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1312.0, "completions/max_terminated_length": 1312.0, "completions/mean_length": 498.39453125, "completions/mean_terminated_length": 506.3055725097656, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.030933333333333334, "grad_norm": 0.00959760695695877, "learning_rate": 4.75e-06, "loss": -0.0374, "num_tokens": 7724828.0, "reward": 1.5990973711013794, "reward_std": 0.5396340489387512, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5891090035438538, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.760405421257019, "step": 29 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4417037507946599, "calib/avg_num_step_conf": 6.20703125, "calib/ece": 0.3761354581673306, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3426294820717131, "calib/gap": -0.017837253655435648, "calib/mean_conf": 0.8504780876494024, "calib/mu_c": 0.8412396694214875, "calib/mu_w": 0.8590769230769232, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3722709163346613, "calib/std_conf": 0.09256294573013182, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1832.0, "completions/max_terminated_length": 1832.0, "completions/mean_length": 501.0, "completions/mean_terminated_length": 510.9801025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.032, "grad_norm": 0.010969402268528938, "learning_rate": 4.722222222222222e-06, "loss": -0.018, "num_tokens": 7960068.0, "reward": 1.525612711906433, "reward_std": 0.6004476547241211, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5841984152793884, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7370023727416992, "step": 30 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4944075063692289, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.33633858267716543, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.14566929133858267, "calib/gap": 0.0018871559062948462, "calib/mean_conf": 0.8090944881889763, "calib/mu_c": 0.8100826446280993, "calib/mu_w": 0.8081954887218045, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33452755905511816, "calib/std_conf": 0.08994512177016528, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 493.3203125, "completions/mean_terminated_length": 499.16998291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.03306666666666667, "grad_norm": 0.009395385161042213, "learning_rate": 4.694444444444445e-06, "loss": 0.0088, "num_tokens": 8192270.0, "reward": 1.5579791069030762, "reward_std": 0.4944719076156616, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6239476203918457, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7954689264297485, "step": 31 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4968307692307692, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.24705882352941166, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": -0.007018461538461529, "calib/mean_conf": 0.7509019607843138, "calib/mu_c": 0.7474615384615384, "calib/mu_w": 0.7544799999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24407843137254892, "calib/std_conf": 0.10591695876023853, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 465.46875, "completions/mean_terminated_length": 472.857177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.034133333333333335, "grad_norm": 0.01071823202073574, "learning_rate": 4.666666666666667e-06, "loss": -0.016, "num_tokens": 8418134.0, "reward": 1.6352819204330444, "reward_std": 0.4783379137516022, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6744414567947388, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8354364633560181, "step": 32 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4899872854418309, "calib/avg_num_step_conf": 5.52734375, "calib/ece": 0.16980237154150202, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": -0.007132867132867204, "calib/mean_conf": 0.7214229249011856, "calib/mu_c": 0.7183216783216783, "calib/mu_w": 0.7254545454545455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16300395256917, "calib/std_conf": 0.12191631528612948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 514.33203125, "completions/mean_terminated_length": 518.3818969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.0352, "grad_norm": 0.008350872434675694, "learning_rate": 4.638888888888889e-06, "loss": 0.0144, "num_tokens": 8656675.0, "reward": 1.7188265323638916, "reward_std": 0.4865078330039978, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6994839906692505, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8555097579956055, "step": 33 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47640522875816993, "calib/avg_num_step_conf": 5.54296875, "calib/ece": 0.14896825396825403, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": -0.011184313725490336, "calib/mean_conf": 0.6857936507936507, "calib/mu_c": 0.6812666666666666, "calib/mu_w": 0.6924509803921569, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11976190476190483, "calib/std_conf": 0.12821641659123492, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2431.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 497.8046875, "completions/mean_terminated_length": 499.75689697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.03626666666666667, "grad_norm": 0.009573661722242832, "learning_rate": 4.611111111111112e-06, "loss": 0.0059, "num_tokens": 8889225.0, "reward": 1.7634197473526, "reward_std": 0.5686565637588501, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.715394139289856, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8617225289344788, "step": 34 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4415547208251098, "calib/avg_num_step_conf": 5.40625, "calib/ece": 0.18134920634920634, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": -0.02662634494174576, "calib/mean_conf": 0.7015079365079365, "calib/mu_c": 0.689568345323741, "calib/mu_w": 0.7161946902654868, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16563492063492066, "calib/std_conf": 0.12104746189924816, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 605.14453125, "completions/mean_terminated_length": 609.909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.037333333333333336, "grad_norm": 0.008835054002702236, "learning_rate": 4.583333333333333e-06, "loss": 0.0053, "num_tokens": 9153398.0, "reward": 1.686740756034851, "reward_std": 0.3833616077899933, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.688662052154541, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8395507335662842, "step": 35 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.501685855263158, "calib/avg_num_step_conf": 4.85546875, "calib/ece": 0.09594488188976376, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.003531249999999986, "calib/mean_conf": 0.7131102362204724, "calib/mu_c": 0.714, "calib/mu_w": 0.71046875, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.030511811023622007, "calib/std_conf": 0.10194646799259242, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 483.80859375, "completions/mean_terminated_length": 487.61810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.0384, "grad_norm": 0.016380857676267624, "learning_rate": 4.555555555555556e-06, "loss": 0.0072, "num_tokens": 9379965.0, "reward": 2.017597198486328, "reward_std": 0.48127132654190063, "rewards/accuracy_reward_step": 0.7421875, "rewards/final_brier_reward_step": 0.7909355759620667, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8575782775878906, "step": 36 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4459050564819796, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.1722672064777328, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.032388663967611336, "calib/gap": -0.0178321678321679, "calib/mean_conf": 0.7465991902834008, "calib/mu_c": 0.739090909090909, "calib/mu_w": 0.7569230769230769, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.16995951417004052, "calib/std_conf": 0.09547544186514331, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2609.0, "completions/max_terminated_length": 2609.0, "completions/mean_length": 575.828125, "completions/mean_terminated_length": 580.3621826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.039466666666666664, "grad_norm": 0.010821904055774212, "learning_rate": 4.527777777777778e-06, "loss": 0.0247, "num_tokens": 9634473.0, "reward": 1.682852864265442, "reward_std": 0.38647031784057617, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6793968677520752, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.8020142912864685, "step": 37 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5007050765511684, "calib/avg_num_step_conf": 4.59375, "calib/ece": 0.20439516129032248, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.04435483870967742, "calib/gap": -0.001553854418479772, "calib/mean_conf": 0.7931048387096774, "calib/mu_c": 0.7924657534246574, "calib/mu_w": 0.7940196078431372, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20439516129032248, "calib/std_conf": 0.07716523102934827, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 612.84375, "completions/mean_terminated_length": 617.6693115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 235.0, "epoch": 0.04053333333333333, "grad_norm": 0.009312131442129612, "learning_rate": 4.5e-06, "loss": 0.0488, "num_tokens": 9898249.0, "reward": 1.7124087810516357, "reward_std": 0.5167993307113647, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6861327886581421, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.8119394779205322, "step": 38 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5022058823529412, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.2915936254980079, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.09561752988047809, "calib/gap": -0.0007378516624041431, "calib/mean_conf": 0.8334262948207172, "calib/mu_c": 0.8330882352941176, "calib/mu_w": 0.8338260869565217, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2915936254980079, "calib/std_conf": 0.06562155736230371, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 580.625, "completions/mean_terminated_length": 580.625, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.0416, "grad_norm": 0.008780171163380146, "learning_rate": 4.472222222222223e-06, "loss": 0.0244, "num_tokens": 10152977.0, "reward": 1.6495329141616821, "reward_std": 0.5215615034103394, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6453015804290771, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8122050166130066, "step": 39 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5291847109772171, "calib/avg_num_step_conf": 4.4375, "calib/ece": 0.3016535433070866, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.14173228346456693, "calib/gap": 0.00837256009539955, "calib/mean_conf": 0.8567716535433071, "calib/mu_c": 0.8604964539007092, "calib/mu_w": 0.8521238938053096, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3016535433070866, "calib/std_conf": 0.0547130220973908, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2471.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 577.18359375, "completions/mean_terminated_length": 581.7283325195312, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.042666666666666665, "grad_norm": 0.009869641624391079, "learning_rate": 4.444444444444444e-06, "loss": 0.0026, "num_tokens": 10407496.0, "reward": 1.6799644231796265, "reward_std": 0.5000669360160828, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6576230525970459, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7809846997261047, "step": 40 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5991044776119404, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.0840637450199204, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3346613545816733, "calib/gap": 0.019556218905472544, "calib/mean_conf": 0.8848605577689241, "calib/mu_c": 0.8887562189054725, "calib/mu_w": 0.8692, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0840637450199204, "calib/std_conf": 0.05505774294189895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 529.984375, "completions/mean_terminated_length": 538.3968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.04373333333333333, "grad_norm": 0.00941492710262537, "learning_rate": 4.416666666666667e-06, "loss": -0.0049, "num_tokens": 10650420.0, "reward": 2.0804450511932373, "reward_std": 0.3655003011226654, "rewards/accuracy_reward_step": 0.78515625, "rewards/final_brier_reward_step": 0.8202797174453735, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.8296257257461548, "step": 41 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.538619390432618, "calib/avg_num_step_conf": 4.51953125, "calib/ece": 0.251328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4453125, "calib/gap": 0.0070066608356322835, "calib/mean_conf": 0.903671875, "calib/mu_c": 0.906107784431138, "calib/mu_w": 0.8991011235955058, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.251328125, "calib/std_conf": 0.04970021211206621, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 913.0, "completions/max_terminated_length": 913.0, "completions/mean_length": 427.34765625, "completions/mean_terminated_length": 434.1309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.0448, "grad_norm": 0.010592802427709103, "learning_rate": 4.388888888888889e-06, "loss": -0.0141, "num_tokens": 10864189.0, "reward": 1.8587779998779297, "reward_std": 0.3515624403953552, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7107508182525635, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8102985620498657, "step": 42 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6028612531969308, "calib/avg_num_step_conf": 4.51171875, "calib/ece": 0.18884920634920657, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.5674603174603174, "calib/gap": 0.019990409207160997, "calib/mean_conf": 0.9190079365079366, "calib/mu_c": 0.9244021739130434, "calib/mu_w": 0.9044117647058824, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18884920634920657, "calib/std_conf": 0.04412518196045866, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 521.09375, "completions/mean_terminated_length": 527.2727661132812, "completions/min_length": 0.0, "completions/min_terminated_length": 249.0, "epoch": 0.04586666666666667, "grad_norm": 0.010173147544264793, "learning_rate": 4.361111111111112e-06, "loss": -0.0125, "num_tokens": 11102813.0, "reward": 1.9611314535140991, "reward_std": 0.4487866759300232, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7611573934555054, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8021184206008911, "step": 43 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5419849758085052, "calib/avg_num_step_conf": 4.44140625, "calib/ece": 0.400199203187251, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5657370517928287, "calib/gap": 0.005902724726254083, "calib/mean_conf": 0.9193227091633466, "calib/mu_c": 0.9221212121212122, "calib/mu_w": 0.9162184873949581, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3968127490039841, "calib/std_conf": 0.05360267210135566, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2558.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 584.58984375, "completions/mean_terminated_length": 586.8823852539062, "completions/min_length": 0.0, "completions/min_terminated_length": 254.0, "epoch": 0.046933333333333334, "grad_norm": 0.008310251869261265, "learning_rate": 4.333333333333334e-06, "loss": 0.009, "num_tokens": 11358788.0, "reward": 1.6001975536346436, "reward_std": 0.4775276780128479, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5843167304992676, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7617859244346619, "step": 44 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5925801238890386, "calib/avg_num_step_conf": 4.453125, "calib/ece": 0.30039682539682533, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6190476190476191, "calib/gap": 0.015844330729868017, "calib/mean_conf": 0.9273809523809523, "calib/mu_c": 0.9332911392405062, "calib/mu_w": 0.9174468085106382, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30039682539682533, "calib/std_conf": 0.04292256910735317, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 540.63671875, "completions/mean_terminated_length": 544.8936767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.048, "grad_norm": 0.01107387151569128, "learning_rate": 4.305555555555556e-06, "loss": 0.002, "num_tokens": 11602239.0, "reward": 1.7854273319244385, "reward_std": 0.4884273111820221, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6708077788352966, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7990267872810364, "step": 45 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5024832301341589, "calib/avg_num_step_conf": 4.3671875, "calib/ece": 0.3325196850393701, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6653543307086615, "calib/gap": -0.002844427244581782, "calib/mean_conf": 0.9309448818897638, "calib/mu_c": 0.9298026315789476, "calib/mu_w": 0.9326470588235294, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3325196850393701, "calib/std_conf": 0.04164781229645246, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2456.0, "completions/max_terminated_length": 2456.0, "completions/mean_length": 589.28515625, "completions/mean_terminated_length": 593.9251708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.04906666666666667, "grad_norm": 0.020898550748825073, "learning_rate": 4.277777777777778e-06, "loss": 0.0114, "num_tokens": 11857864.0, "reward": 1.743819236755371, "reward_std": 0.47148585319519043, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6370730400085449, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7991417646408081, "step": 46 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5463698241633579, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.2802400000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": 0.007562393647192245, "calib/mean_conf": 0.93624, "calib/mu_c": 0.9388414634146341, "calib/mu_w": 0.9312790697674419, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2802400000000001, "calib/std_conf": 0.03326653573788528, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 638.3046875, "completions/mean_terminated_length": 640.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 293.0, "epoch": 0.050133333333333335, "grad_norm": 0.009583506733179092, "learning_rate": 4.25e-06, "loss": 0.0005, "num_tokens": 12127246.0, "reward": 1.8233468532562256, "reward_std": 0.34553125500679016, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6817461252212524, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8147666454315186, "step": 47 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5263819927176432, "calib/avg_num_step_conf": 4.19921875, "calib/ece": 0.30960629921259836, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.6732283464566929, "calib/gap": 0.0027090367428002082, "calib/mean_conf": 0.9355905511811025, "calib/mu_c": 0.9366037735849058, "calib/mu_w": 0.9338947368421056, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30960629921259836, "calib/std_conf": 0.03437068755041604, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 560.03515625, "completions/mean_terminated_length": 566.6759033203125, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.0512, "grad_norm": 0.01360836811363697, "learning_rate": 4.222222222222223e-06, "loss": -0.0274, "num_tokens": 12374303.0, "reward": 1.7975071668624878, "reward_std": 0.48006561398506165, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6648679971694946, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8142232894897461, "step": 48 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5263713080168777, "calib/avg_num_step_conf": 4.1015625, "calib/ece": 0.2469169960474309, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6600790513833992, "calib/gap": 0.003284591881274168, "calib/mean_conf": 0.9346640316205533, "calib/mu_c": 0.9356896551724138, "calib/mu_w": 0.9324050632911396, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2469169960474309, "calib/std_conf": 0.0297616647798036, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2460.0, "completions/max_terminated_length": 2460.0, "completions/mean_length": 630.34765625, "completions/mean_terminated_length": 640.3532104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.05226666666666667, "grad_norm": 0.010739161632955074, "learning_rate": 4.194444444444445e-06, "loss": -0.007, "num_tokens": 12640208.0, "reward": 1.897510051727295, "reward_std": 0.49483951926231384, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.716312050819397, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8190404176712036, "step": 49 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.603075091061922, "calib/avg_num_step_conf": 3.83203125, "calib/ece": 0.26188235294117657, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6705882352941176, "calib/gap": 0.013049173437937878, "calib/mean_conf": 0.9363921568627452, "calib/mu_c": 0.9406395348837211, "calib/mu_w": 0.9275903614457832, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26188235294117657, "calib/std_conf": 0.03151599558479603, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 616.73046875, "completions/mean_terminated_length": 624.0435180664062, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.05333333333333334, "grad_norm": 0.010514969006180763, "learning_rate": 4.166666666666667e-06, "loss": -0.003, "num_tokens": 12903451.0, "reward": 1.8781836032867432, "reward_std": 0.39321768283843994, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7060120701789856, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7989096641540527, "step": 50 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6135584376863447, "calib/avg_num_step_conf": 3.72265625, "calib/ece": 0.24864000000000014, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.68, "calib/gap": 0.017291293977340305, "calib/mean_conf": 0.9366400000000001, "calib/mu_c": 0.9420348837209304, "calib/mu_w": 0.92474358974359, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24864000000000014, "calib/std_conf": 0.03652821375320724, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 725.875, "completions/mean_terminated_length": 725.875, "completions/min_length": 272.0, "completions/min_terminated_length": 272.0, "epoch": 0.0544, "grad_norm": 0.009126733988523483, "learning_rate": 4.138888888888889e-06, "loss": 0.0454, "num_tokens": 13198571.0, "reward": 1.874480128288269, "reward_std": 0.47806280851364136, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.712510883808136, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.8010349273681641, "step": 51 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4878570717627152, "calib/avg_num_step_conf": 4.01953125, "calib/ece": 0.15903225806451618, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.7580645161290323, "calib/gap": 0.0030536478550813317, "calib/mean_conf": 0.9469354838709677, "calib/mu_c": 0.9475634517766498, "calib/mu_w": 0.9445098039215685, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.15580645161290327, "calib/std_conf": 0.04660536829377197, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2477.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 719.6015625, "completions/mean_terminated_length": 733.936279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.055466666666666664, "grad_norm": 0.010852030478417873, "learning_rate": 4.111111111111111e-06, "loss": 0.0161, "num_tokens": 13490741.0, "reward": 2.035630702972412, "reward_std": 0.41557496786117554, "rewards/accuracy_reward_step": 0.76953125, "rewards/final_brier_reward_step": 0.7765073776245117, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.8347657918930054, "step": 52 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.554396650171298, "calib/avg_num_step_conf": 4.16796875, "calib/ece": 0.2316796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.80078125, "calib/gap": 0.004051008755233898, "calib/mean_conf": 0.9543359375, "calib/mu_c": 0.9554594594594594, "calib/mu_w": 0.9514084507042255, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2316796875, "calib/std_conf": 0.03062417788277904, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2055.0, "completions/max_terminated_length": 2055.0, "completions/mean_length": 744.93359375, "completions/mean_terminated_length": 756.7579956054688, "completions/min_length": 0.0, "completions/min_terminated_length": 302.0, "epoch": 0.05653333333333333, "grad_norm": 0.010888488031923771, "learning_rate": 4.083333333333334e-06, "loss": 0.0073, "num_tokens": 13787268.0, "reward": 1.9786882400512695, "reward_std": 0.43185755610466003, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7465863227844238, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8322290182113647, "step": 53 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6321407274895647, "calib/avg_num_step_conf": 4.45703125, "calib/ece": 0.12000000000000009, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.905511811023622, "calib/gap": 0.02065354800238517, "calib/mean_conf": 0.966456692913386, "calib/mu_c": 0.9696279069767443, "calib/mu_w": 0.9489743589743591, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12000000000000009, "calib/std_conf": 0.03484561132242132, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 736.640625, "completions/mean_terminated_length": 748.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.0576, "grad_norm": 0.010013816878199577, "learning_rate": 4.055555555555556e-06, "loss": -0.0057, "num_tokens": 14082080.0, "reward": 2.181171417236328, "reward_std": 0.31019675731658936, "rewards/accuracy_reward_step": 0.83984375, "rewards/final_brier_reward_step": 0.8491636514663696, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.859897255897522, "step": 54 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6077430972388956, "calib/avg_num_step_conf": 4.53125, "calib/ece": 0.37301204819277134, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9116465863453815, "calib/gap": 0.03377951180472172, "calib/mean_conf": 0.9633734939759038, "calib/mu_c": 0.9772108843537415, "calib/mu_w": 0.9434313725490198, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37301204819277134, "calib/std_conf": 0.08802076847524597, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2492.0, "completions/max_terminated_length": 2492.0, "completions/mean_length": 802.35546875, "completions/mean_terminated_length": 811.8695678710938, "completions/min_length": 0.0, "completions/min_terminated_length": 324.0, "epoch": 0.058666666666666666, "grad_norm": 0.009584730491042137, "learning_rate": 4.027777777777779e-06, "loss": 0.0133, "num_tokens": 14395307.0, "reward": 1.6883587837219238, "reward_std": 0.4356482923030853, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6104562282562256, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7523536682128906, "step": 55 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6073613512435165, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.39220883534136564, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.9678714859437751, "calib/gap": 0.006641175688256062, "calib/mean_conf": 0.97855421686747, "calib/mu_c": 0.9813013698630135, "calib/mu_w": 0.9746601941747575, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.39220883534136564, "calib/std_conf": 0.018310082160494335, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 820.5625, "completions/mean_terminated_length": 833.5873413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 439.0, "epoch": 0.05973333333333333, "grad_norm": 0.010322212241590023, "learning_rate": 4.000000000000001e-06, "loss": -0.0006, "num_tokens": 14712211.0, "reward": 1.6709468364715576, "reward_std": 0.3402813971042633, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5817476511001587, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7582898139953613, "step": 56 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5652153902798233, "calib/avg_num_step_conf": 4.61328125, "calib/ece": 0.20820000000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.988, "calib/gap": 0.006562960235640736, "calib/mean_conf": 0.9842000000000001, "calib/mu_c": 0.9856701030927837, "calib/mu_w": 0.979107142857143, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.20820000000000002, "calib/std_conf": 0.016624078921853087, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2762.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 848.28515625, "completions/mean_terminated_length": 854.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.0608, "grad_norm": 0.01343838032335043, "learning_rate": 3.972222222222223e-06, "loss": -0.0141, "num_tokens": 15036164.0, "reward": 2.0146446228027344, "reward_std": 0.42082732915878296, "rewards/accuracy_reward_step": 0.7578125, "rewards/final_brier_reward_step": 0.7586382627487183, "rewards/format_reward_step": 0.96875, "rewards/stepwise_brier_reward": 0.8155644536018372, "step": 57 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4425564116675839, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.35218000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.984, "calib/gap": -0.003189323059988869, "calib/mean_conf": 0.98418, "calib/mu_c": 0.983006329113924, "calib/mu_w": 0.9861956521739129, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.35218000000000005, "calib/std_conf": 0.014471613593514721, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 875.03125, "completions/mean_terminated_length": 885.4071655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 379.0, "epoch": 0.06186666666666667, "grad_norm": 0.00953914038836956, "learning_rate": 3.944444444444445e-06, "loss": -0.0265, "num_tokens": 15366492.0, "reward": 1.751786708831787, "reward_std": 0.6095901131629944, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.622763991355896, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7359455823898315, "step": 58 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5800150519978106, "calib/avg_num_step_conf": 4.609375, "calib/ece": 0.3273725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.0024261083743843948, "calib/mean_conf": 0.9861960784313726, "calib/mu_c": 0.9870238095238096, "calib/mu_w": 0.9845977011494252, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3273725490196079, "calib/std_conf": 0.00929055942477207, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1890.0, "completions/max_terminated_length": 1890.0, "completions/mean_length": 769.13671875, "completions/mean_terminated_length": 781.3452758789062, "completions/min_length": 0.0, "completions/min_terminated_length": 339.0, "epoch": 0.06293333333333333, "grad_norm": 0.009146219119429588, "learning_rate": 3.916666666666667e-06, "loss": -0.0049, "num_tokens": 15669639.0, "reward": 1.8364317417144775, "reward_std": 0.4589112102985382, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6660621166229248, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7577903270721436, "step": 59 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5622893258426966, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.34421686746987956, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0032183988764044047, "calib/mean_conf": 0.9867871485943776, "calib/mu_c": 0.9879374999999999, "calib/mu_w": 0.9847191011235955, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.34421686746987956, "calib/std_conf": 0.007973278383118638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2733.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 799.46875, "completions/mean_terminated_length": 815.3944702148438, "completions/min_length": 0.0, "completions/min_terminated_length": 388.0, "epoch": 0.064, "grad_norm": 0.009623522870242596, "learning_rate": 3.88888888888889e-06, "loss": -0.0195, "num_tokens": 15983159.0, "reward": 1.7713819742202759, "reward_std": 0.5381677150726318, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6353933811187744, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.754822313785553, "step": 60 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5385592241976449, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.26834645669291335, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005110444085276722, "calib/mean_conf": 0.9888188976377953, "calib/mu_c": 0.9889617486338795, "calib/mu_w": 0.9884507042253519, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26834645669291335, "calib/std_conf": 0.005115080872098183, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 753.19140625, "completions/mean_terminated_length": 759.1220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 361.0, "epoch": 0.06506666666666666, "grad_norm": 0.009747637435793877, "learning_rate": 3.861111111111112e-06, "loss": 0.0259, "num_tokens": 16280040.0, "reward": 1.9452223777770996, "reward_std": 0.3222607970237732, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7210999727249146, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7863516211509705, "step": 61 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5532234432234432, "calib/avg_num_step_conf": 4.796875, "calib/ece": 0.296403162055336, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0020761904761904537, "calib/mean_conf": 0.988102766798419, "calib/mu_c": 0.988742857142857, "calib/mu_w": 0.9866666666666666, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.296403162055336, "calib/std_conf": 0.0063772471241418886, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1681.0, "completions/max_terminated_length": 1681.0, "completions/mean_length": 783.80078125, "completions/mean_terminated_length": 796.2421264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 401.0, "epoch": 0.06613333333333334, "grad_norm": 0.01057348120957613, "learning_rate": 3.833333333333334e-06, "loss": -0.023, "num_tokens": 16587773.0, "reward": 1.890638828277588, "reward_std": 0.4780218303203583, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.691383957862854, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8008589744567871, "step": 62 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5422031955774287, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.3461811023622048, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0008582215330680265, "calib/mean_conf": 0.9879133858267717, "calib/mu_c": 0.9882208588957054, "calib/mu_w": 0.9873626373626374, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3461811023622048, "calib/std_conf": 0.005884081852323143, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 811.48046875, "completions/mean_terminated_length": 817.8700561523438, "completions/min_length": 0.0, "completions/min_terminated_length": 330.0, "epoch": 0.0672, "grad_norm": 0.01171042863279581, "learning_rate": 3.8055555555555556e-06, "loss": -0.0046, "num_tokens": 16904152.0, "reward": 1.7990059852600098, "reward_std": 0.3330551087856293, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.641627311706543, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7575217485427856, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5147540983606558, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.2632411067193676, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9881422924901185, "calib/gap": -0.0015940671350509472, "calib/mean_conf": 0.9865612648221345, "calib/mu_c": 0.9861202185792348, "calib/mu_w": 0.9877142857142858, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2632411067193676, "calib/std_conf": 0.011712683641421357, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 777.28125, "completions/mean_terminated_length": 789.6190795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 368.0, "epoch": 0.06826666666666667, "grad_norm": 0.009954201057553291, "learning_rate": 3.777777777777778e-06, "loss": 0.0079, "num_tokens": 17206912.0, "reward": 1.949721336364746, "reward_std": 0.5117054581642151, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.72124844789505, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.8120118379592896, "step": 64 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5699662030417262, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.37192156862745107, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.001418172364486936, "calib/mean_conf": 0.987607843137255, "calib/mu_c": 0.9881528662420381, "calib/mu_w": 0.9867346938775512, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37192156862745107, "calib/std_conf": 0.006084626823662836, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2177.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 724.48046875, "completions/mean_terminated_length": 735.980224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 342.0, "epoch": 0.06933333333333333, "grad_norm": 0.015558356419205666, "learning_rate": 3.7500000000000005e-06, "loss": -0.0087, "num_tokens": 17497403.0, "reward": 1.7543145418167114, "reward_std": 0.38484832644462585, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6193417906761169, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7338536977767944, "step": 65 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4737023139462164, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.473197628458498, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.001172607879925125, "calib/mean_conf": 0.9870316205533597, "calib/mu_c": 0.9864615384615385, "calib/mu_w": 0.9876341463414636, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.473197628458498, "calib/std_conf": 0.008257646363266199, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 826.0, "completions/mean_terminated_length": 835.7944946289062, "completions/min_length": 0.0, "completions/min_terminated_length": 355.0, "epoch": 0.0704, "grad_norm": 0.009562370367348194, "learning_rate": 3.7222222222222225e-06, "loss": -0.0245, "num_tokens": 17815211.0, "reward": 1.5644525289535522, "reward_std": 0.4363713562488556, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5194617509841919, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7149111032485962, "step": 66 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6139020225899658, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.35351562500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.003777252429734701, "calib/mean_conf": 0.9851562500000002, "calib/mu_c": 0.9865432098765432, "calib/mu_w": 0.9827659574468085, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35292968750000003, "calib/std_conf": 0.012898908323478394, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1802.0, "completions/max_terminated_length": 1802.0, "completions/mean_length": 775.19140625, "completions/mean_terminated_length": 787.49609375, "completions/min_length": 0.0, "completions/min_terminated_length": 432.0, "epoch": 0.07146666666666666, "grad_norm": 0.011739449575543404, "learning_rate": 3.694444444444445e-06, "loss": 0.0093, "num_tokens": 18118668.0, "reward": 1.8034915924072266, "reward_std": 0.41813188791275024, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6447757482528687, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7801278829574585, "step": 67 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5155316606929511, "calib/avg_num_step_conf": 4.88671875, "calib/ece": 0.3502745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.0011489446435684059, "calib/mean_conf": 0.9855686274509805, "calib/mu_c": 0.9859876543209878, "calib/mu_w": 0.9848387096774194, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3502745098039217, "calib/std_conf": 0.01142624241661134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 761.01953125, "completions/mean_terminated_length": 773.0992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.07253333333333334, "grad_norm": 0.011653666384518147, "learning_rate": 3.6666666666666666e-06, "loss": -0.034, "num_tokens": 18417577.0, "reward": 1.7976186275482178, "reward_std": 0.37335824966430664, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6434906721115112, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7579212188720703, "step": 68 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6188670685073563, "calib/avg_num_step_conf": 4.90234375, "calib/ece": 0.42772, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.98, "calib/gap": 0.004584224512281909, "calib/mean_conf": 0.98372, "calib/mu_c": 0.9857553956834533, "calib/mu_w": 0.9811711711711714, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.42772, "calib/std_conf": 0.01513147712551554, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2520.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 743.21484375, "completions/mean_terminated_length": 758.0199584960938, "completions/min_length": 0.0, "completions/min_terminated_length": 383.0, "epoch": 0.0736, "grad_norm": 0.013122929260134697, "learning_rate": 3.638888888888889e-06, "loss": 0.0125, "num_tokens": 18712336.0, "reward": 1.6212658882141113, "reward_std": 0.3981201648712158, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5588144659996033, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7153115272521973, "step": 69 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6050340136054422, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.3887854251012147, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.004750340136054465, "calib/mean_conf": 0.983927125506073, "calib/mu_c": 0.9858503401360545, "calib/mu_w": 0.9811000000000001, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3887854251012147, "calib/std_conf": 0.012121297925437025, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 834.828125, "completions/mean_terminated_length": 854.864013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 366.0, "epoch": 0.07466666666666667, "grad_norm": 0.012035924009978771, "learning_rate": 3.6111111111111115e-06, "loss": -0.0489, "num_tokens": 19033044.0, "reward": 1.669405221939087, "reward_std": 0.2907199263572693, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.584688663482666, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.7257446050643921, "step": 70 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5980133727658481, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.37721568627450996, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9803921568627451, "calib/gap": 0.0064735759290215356, "calib/mean_conf": 0.9811372549019609, "calib/mu_c": 0.9837012987012987, "calib/mu_w": 0.9772277227722772, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37721568627450996, "calib/std_conf": 0.018124860629990892, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1754.0, "completions/max_terminated_length": 1754.0, "completions/mean_length": 778.9921875, "completions/mean_terminated_length": 791.357177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 370.0, "epoch": 0.07573333333333333, "grad_norm": 0.012741051614284515, "learning_rate": 3.5833333333333335e-06, "loss": 0.0015, "num_tokens": 19336874.0, "reward": 1.7401325702667236, "reward_std": 0.6308400630950928, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6186171770095825, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7559757232666016, "step": 71 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6513831121674258, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.3752777777777778, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9841269841269841, "calib/gap": 0.010308972073678091, "calib/mean_conf": 0.9824206349206349, "calib/mu_c": 0.9864705882352942, "calib/mu_w": 0.9761616161616161, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3752777777777778, "calib/std_conf": 0.015612293271143017, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2219.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 708.7109375, "completions/mean_terminated_length": 722.8287353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 390.0, "epoch": 0.0768, "grad_norm": 0.011979524977505207, "learning_rate": 3.555555555555556e-06, "loss": -0.0052, "num_tokens": 19622712.0, "reward": 1.7353651523590088, "reward_std": 0.37993955612182617, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.615549623966217, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7712236642837524, "step": 72 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.569731738849386, "calib/avg_num_step_conf": 4.9375, "calib/ece": 0.2537600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.984, "calib/gap": 0.005447640594699266, "calib/mean_conf": 0.9817600000000001, "calib/mu_c": 0.9832417582417582, "calib/mu_w": 0.9777941176470589, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2537600000000001, "calib/std_conf": 0.016926381775205245, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2510.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 709.1484375, "completions/mean_terminated_length": 723.27490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 363.0, "epoch": 0.07786666666666667, "grad_norm": 0.015338779427111149, "learning_rate": 3.5277777777777784e-06, "loss": 0.0176, "num_tokens": 19911286.0, "reward": 1.9338476657867432, "reward_std": 0.49027276039123535, "rewards/accuracy_reward_step": 0.7109375, "rewards/final_brier_reward_step": 0.7219749689102173, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.8024784326553345, "step": 73 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.543591922096595, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.4088400000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0035925756486504534, "calib/mean_conf": 0.9808400000000002, "calib/mu_c": 0.9823776223776225, "calib/mu_w": 0.978785046728972, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4088400000000002, "calib/std_conf": 0.015533653787824694, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2793.0, "completions/max_terminated_length": 2793.0, "completions/mean_length": 714.86328125, "completions/mean_terminated_length": 729.1036376953125, "completions/min_length": 0.0, "completions/min_terminated_length": 376.0, "epoch": 0.07893333333333333, "grad_norm": 0.011618921533226967, "learning_rate": 3.5e-06, "loss": -0.019, "num_tokens": 20198219.0, "reward": 1.651144027709961, "reward_std": 0.4587170481681824, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.57573401927948, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7241549491882324, "step": 74 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6485243055555555, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.21861111111111128, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9642857142857143, "calib/gap": 0.009864583333333177, "calib/mean_conf": 0.9805158730158731, "calib/mu_c": 0.9828645833333334, "calib/mu_w": 0.9730000000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21861111111111128, "calib/std_conf": 0.030422483886513023, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2484.0, "completions/max_terminated_length": 2484.0, "completions/mean_length": 698.39453125, "completions/mean_terminated_length": 709.480224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 402.0, "epoch": 0.08, "grad_norm": 0.011897450312972069, "learning_rate": 3.4722222222222224e-06, "loss": -0.017, "num_tokens": 20481760.0, "reward": 2.0114951133728027, "reward_std": 0.47728487849235535, "rewards/accuracy_reward_step": 0.75, "rewards/final_brier_reward_step": 0.7613714933395386, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.8158584833145142, "step": 75 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5058423913043478, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.35123015873015884, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.996031746031746, "calib/gap": -0.000584239130434816, "calib/mean_conf": 0.9861507936507937, "calib/mu_c": 0.9859375, "calib/mu_w": 0.9865217391304348, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35123015873015884, "calib/std_conf": 0.0132985065758489, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2503.0, "completions/max_terminated_length": 2503.0, "completions/mean_length": 691.328125, "completions/mean_terminated_length": 702.3016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.08106666666666666, "grad_norm": 0.013411462306976318, "learning_rate": 3.444444444444445e-06, "loss": -0.0219, "num_tokens": 20761796.0, "reward": 1.770603060722351, "reward_std": 0.3884144425392151, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6304218769073486, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7410531044006348, "step": 76 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5758309248554914, "calib/avg_num_step_conf": 4.9609375, "calib/ece": 0.30312252964426867, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": 0.004814306358381537, "calib/mean_conf": 0.9869169960474308, "calib/mu_c": 0.9884393063583815, "calib/mu_w": 0.983625, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30312252964426867, "calib/std_conf": 0.012852962457764204, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1565.0, "completions/max_terminated_length": 1565.0, "completions/mean_length": 657.34375, "completions/mean_terminated_length": 670.4382934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 332.0, "epoch": 0.08213333333333334, "grad_norm": 0.011887096799910069, "learning_rate": 3.416666666666667e-06, "loss": -0.0116, "num_tokens": 21034740.0, "reward": 1.8679624795913696, "reward_std": 0.5159233808517456, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6855285167694092, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7706961631774902, "step": 77 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5792716891797799, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.3280592885375495, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9881422924901185, "calib/gap": 0.008647611753237694, "calib/mean_conf": 0.9881383399209487, "calib/mu_c": 0.9910778443113772, "calib/mu_w": 0.9824302325581395, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3280592885375495, "calib/std_conf": 0.021115544926909733, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1318.0, "completions/max_terminated_length": 1318.0, "completions/mean_length": 689.78125, "completions/mean_terminated_length": 700.730224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 358.0, "epoch": 0.0832, "grad_norm": 0.013817236758768559, "learning_rate": 3.3888888888888893e-06, "loss": -0.0103, "num_tokens": 21319348.0, "reward": 1.8158355951309204, "reward_std": 0.4627304673194885, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6557570099830627, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7325854301452637, "step": 78 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49284195605953224, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.3271314741035858, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9920318725099602, "calib/gap": -0.0022891566265060836, "calib/mean_conf": 0.9884860557768925, "calib/mu_c": 0.987710843373494, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3271314741035858, "calib/std_conf": 0.015072824705491235, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 671.00390625, "completions/mean_terminated_length": 681.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 316.0, "epoch": 0.08426666666666667, "grad_norm": 0.04336387291550636, "learning_rate": 3.3611111111111117e-06, "loss": -0.0013, "num_tokens": 21597501.0, "reward": 1.810776948928833, "reward_std": 0.4501277208328247, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6545706987380981, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7447869777679443, "step": 79 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5829744473155326, "calib/avg_num_step_conf": 4.16796875, "calib/ece": 0.31088932806324104, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9960474308300395, "calib/gap": 0.0086118288831466, "calib/mean_conf": 0.9907312252964426, "calib/mu_c": 0.9934883720930232, "calib/mu_w": 0.9848765432098766, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31088932806324104, "calib/std_conf": 0.03193927806759079, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 560.9375, "completions/mean_terminated_length": 569.84130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.08533333333333333, "grad_norm": 0.01943901553750038, "learning_rate": 3.3333333333333333e-06, "loss": -0.0237, "num_tokens": 21843261.0, "reward": 1.8514273166656494, "reward_std": 0.4234926700592041, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.6763685941696167, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7371532320976257, "step": 80 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.536545240893067, "calib/avg_num_step_conf": 3.375, "calib/ece": 0.26109803149606314, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.002820955738347286, "calib/mean_conf": 0.9894444881889765, "calib/mu_c": 0.990210810810811, "calib/mu_w": 0.9873898550724637, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26109803149606314, "calib/std_conf": 0.013146753032183678, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1464.0, "completions/max_terminated_length": 1464.0, "completions/mean_length": 547.81640625, "completions/mean_terminated_length": 556.511962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.0864, "grad_norm": 0.051593534648418427, "learning_rate": 3.3055555555555558e-06, "loss": -0.018, "num_tokens": 22089750.0, "reward": 1.9547233581542969, "reward_std": 0.4877452552318573, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.729171872138977, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7694095373153687, "step": 81 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5687758945386063, "calib/avg_num_step_conf": 3.703125, "calib/ece": 0.28967063492063494, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0036992090395480126, "calib/mean_conf": 0.9920515873015873, "calib/mu_c": 0.9931525423728814, "calib/mu_w": 0.9894533333333334, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28967063492063494, "calib/std_conf": 0.009179067232472568, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1269.0, "completions/max_terminated_length": 1269.0, "completions/mean_length": 525.2109375, "completions/mean_terminated_length": 535.67333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.08746666666666666, "grad_norm": 0.014503278769552708, "learning_rate": 3.277777777777778e-06, "loss": -0.0238, "num_tokens": 22329756.0, "reward": 1.881931185722351, "reward_std": 0.35736411809921265, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.6894752979278564, "rewards/format_reward_step": 0.97265625, "rewards/stepwise_brier_reward": 0.7444990873336792, "step": 82 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5276703167005443, "calib/avg_num_step_conf": 4.2734375, "calib/ece": 0.398373015873016, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0012477870303586602, "calib/mean_conf": 0.9975793650793652, "calib/mu_c": 0.9980794701986754, "calib/mu_w": 0.9968316831683167, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.398373015873016, "calib/std_conf": 0.005099180099988029, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 606.671875, "completions/mean_terminated_length": 621.2320556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 294.0, "epoch": 0.08853333333333334, "grad_norm": 0.014670017175376415, "learning_rate": 3.2500000000000002e-06, "loss": -0.0316, "num_tokens": 22592328.0, "reward": 1.6692111492156982, "reward_std": 0.40030843019485474, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5803611874580383, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.6433586478233337, "step": 83 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5767650745021585, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.3348913043478261, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9723320158102767, "calib/gap": 0.012164740286868314, "calib/mean_conf": 0.9898320158102766, "calib/mu_c": 0.9939670658682636, "calib/mu_w": 0.9818023255813952, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3323221343873518, "calib/std_conf": 0.06313149307339118, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 588.890625, "completions/mean_terminated_length": 595.87353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.0896, "grad_norm": 0.014643252827227116, "learning_rate": 3.2222222222222227e-06, "loss": -0.014, "num_tokens": 22849004.0, "reward": 1.7984957695007324, "reward_std": 0.5485701560974121, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6409984230995178, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.709234893321991, "step": 84 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5872381847475832, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.3902511999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0023364930182599997, "calib/mean_conf": 0.9982511999999999, "calib/mu_c": 0.9991671052631579, "calib/mu_w": 0.9968306122448979, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3902511999999999, "calib/std_conf": 0.004816656367232362, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2649.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 625.96875, "completions/mean_terminated_length": 635.90478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.09066666666666667, "grad_norm": 0.01396125741302967, "learning_rate": 3.1944444444444443e-06, "loss": -0.0154, "num_tokens": 23117076.0, "reward": 1.6774587631225586, "reward_std": 0.4869632124900818, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5878802537918091, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.6453918814659119, "step": 85 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5401567398119123, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.430001725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0001905611285267783, "calib/mean_conf": 0.9986291764705881, "calib/mu_c": 0.9987113793103449, "calib/mu_w": 0.9985208181818181, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.430001725490196, "calib/std_conf": 0.003159838372050952, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 589.5390625, "completions/mean_terminated_length": 598.8968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.09173333333333333, "grad_norm": 0.0163346566259861, "learning_rate": 3.1666666666666667e-06, "loss": 0.0001, "num_tokens": 23373510.0, "reward": 1.6383399963378906, "reward_std": 0.5051381587982178, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5676577687263489, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6028897762298584, "step": 86 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5182412060301508, "calib/avg_num_step_conf": 4.890625, "calib/ece": 0.20084948192771096, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.9959839357429718, "calib/gap": -0.0019423119597991034, "calib/mean_conf": 0.9976366305220884, "calib/mu_c": 0.9972466080402009, "calib/mu_w": 0.99918892, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.19964466265060252, "calib/std_conf": 0.019559850423825895, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 588.63671875, "completions/mean_terminated_length": 605.1846923828125, "completions/min_length": 0.0, "completions/min_terminated_length": 335.0, "epoch": 0.0928, "grad_norm": 0.015122613869607449, "learning_rate": 3.138888888888889e-06, "loss": -0.0381, "num_tokens": 23629697.0, "reward": 2.0388693809509277, "reward_std": 0.32220447063446045, "rewards/accuracy_reward_step": 0.77734375, "rewards/final_brier_reward_step": 0.7655643224716187, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.8039764761924744, "step": 87 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5836539941511467, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.2867130517928287, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9960159362549801, "calib/gap": 0.013177578882561214, "calib/mean_conf": 0.9958763984063747, "calib/mu_c": 0.9997089213483147, "calib/mu_w": 0.9865313424657535, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2867130517928287, "calib/std_conf": 0.03848135956455916, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 608.25390625, "completions/mean_terminated_length": 622.85205078125, "completions/min_length": 0.0, "completions/min_terminated_length": 315.0, "epoch": 0.09386666666666667, "grad_norm": 0.01662285625934601, "learning_rate": 3.1111111111111116e-06, "loss": -0.0363, "num_tokens": 23895258.0, "reward": 1.8751245737075806, "reward_std": 0.4252081513404846, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.6741741299629211, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.7560117244720459, "step": 88 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.45828840970350404, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.43035649951178867, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004923530909297646, "calib/mean_conf": 0.9994621905686992, "calib/mu_c": 0.9996743427135714, "calib/mu_w": 0.9991819896226416, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43035649951178867, "calib/std_conf": 0.0021776488690664034, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2567.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 666.6171875, "completions/mean_terminated_length": 688.1209716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 350.0, "epoch": 0.09493333333333333, "grad_norm": 0.016230568289756775, "learning_rate": 3.0833333333333336e-06, "loss": -0.0306, "num_tokens": 24174800.0, "reward": 1.5884817838668823, "reward_std": 0.45879366993904114, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5436413288116455, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6227859258651733, "step": 89 }, { "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.5662393162393162, "calib/avg_num_step_conf": 6.234375, "calib/ece": 0.35607282648366334, "calib/final_conf_rate": 0.7890625, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004208154250425755, "calib/mean_conf": 0.9996371829193069, "calib/mu_c": 0.9997871765361538, "calib/mu_w": 0.9993663611111112, "calib/nonempty_final_conf_rate": 0.7890625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.35607282648366334, "calib/std_conf": 0.0016880916927079351, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.15234375, "completions/max_length": 3003.0, "completions/max_terminated_length": 3003.0, "completions/mean_length": 707.5546875, "completions/mean_terminated_length": 834.7188720703125, "completions/min_length": 0.0, "completions/min_terminated_length": 360.0, "epoch": 0.096, "grad_norm": 0.024469584226608276, "learning_rate": 3.055555555555556e-06, "loss": -0.1823, "num_tokens": 24459254.0, "reward": 1.4220082759857178, "reward_std": 0.8066992163658142, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.508165717124939, "rewards/format_reward_step": 0.77734375, "rewards/stepwise_brier_reward": 0.5783044099807739, "step": 90 }, { "calib/answer_extract_rate": 0.859375, "calib/auroc": 0.4834140218755603, "calib/avg_num_step_conf": 4.671875, "calib/ece": 0.352261226244344, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.84375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0001986480186483286, "calib/mean_conf": 0.9993200497737558, "calib/mu_c": 0.999390160839161, "calib/mu_w": 0.9991915128205127, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.352261226244344, "calib/std_conf": 0.0024244563587599646, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 723.5625, "completions/mean_terminated_length": 798.413818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 392.0, "epoch": 0.09706666666666666, "grad_norm": 0.011809024028480053, "learning_rate": 3.0277777777777776e-06, "loss": -0.0879, "num_tokens": 24752198.0, "reward": 1.5397309064865112, "reward_std": 0.6513643264770508, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.543455958366394, "rewards/format_reward_step": 0.84375, "rewards/stepwise_brier_reward": 0.5764051675796509, "step": 91 }, { "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.5808421729347476, "calib/avg_num_step_conf": 4.71484375, "calib/ece": 0.2820045789252337, "calib/final_conf_rate": 0.8359375, "calib/format_rate": 0.8203125, "calib/frac_conf_gt_0.9": 0.9953271028037384, "calib/gap": 0.00020097260152163177, "calib/mean_conf": 0.9969578499532711, "calib/mu_c": 0.9970151365359478, "calib/mu_w": 0.9968141639344261, "calib/nonempty_final_conf_rate": 0.8359375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2820045789252337, "calib/std_conf": 0.00902268426083969, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.13671875, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 607.453125, "completions/mean_terminated_length": 703.6561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 389.0, "epoch": 0.09813333333333334, "grad_norm": 0.019445186480879784, "learning_rate": 3e-06, "loss": -0.1993, "num_tokens": 25014426.0, "reward": 1.6243741512298584, "reward_std": 0.8272405862808228, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5951131582260132, "rewards/format_reward_step": 0.8203125, "rewards/stepwise_brier_reward": 0.6758211851119995, "step": 92 }, { "calib/answer_extract_rate": 0.8203125, "calib/auroc": 0.5516564952048822, "calib/avg_num_step_conf": 4.57421875, "calib/ece": 0.28653983276190476, "calib/final_conf_rate": 0.8203125, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.9952380952380953, "calib/gap": 0.016762255074106225, "calib/mean_conf": 0.9913017375238096, "calib/mu_c": 0.9962505937837839, "calib/mu_w": 0.9794883387096777, "calib/nonempty_final_conf_rate": 0.8203125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.28653983276190476, "calib/std_conf": 0.06881002485183123, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 717.1015625, "completions/mean_terminated_length": 812.2920532226562, "completions/min_length": 0.0, "completions/min_terminated_length": 385.0, "epoch": 0.0992, "grad_norm": 0.015961183235049248, "learning_rate": 2.9722222222222225e-06, "loss": -0.1268, "num_tokens": 25303780.0, "reward": 1.5653667449951172, "reward_std": 0.7328537106513977, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5762243866920471, "rewards/format_reward_step": 0.80859375, "rewards/stepwise_brier_reward": 0.5993050336837769, "step": 93 }, { "calib/answer_extract_rate": 0.87109375, "calib/auroc": 0.40063063063063065, "calib/avg_num_step_conf": 4.65234375, "calib/ece": 0.3247029910267859, "calib/final_conf_rate": 0.875, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0019463427693693358, "calib/mean_conf": 0.994345848169643, "calib/mu_c": 0.9937028599333331, "calib/mu_w": 0.9956492027027024, "calib/nonempty_final_conf_rate": 0.875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3247029910267859, "calib/std_conf": 0.004928637147614021, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 610.27734375, "completions/mean_terminated_length": 679.2651977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 346.0, "epoch": 0.10026666666666667, "grad_norm": 0.01557290181517601, "learning_rate": 2.944444444444445e-06, "loss": -0.1429, "num_tokens": 25568691.0, "reward": 1.6035652160644531, "reward_std": 0.6202777624130249, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5805128812789917, "rewards/format_reward_step": 0.85546875, "rewards/stepwise_brier_reward": 0.6071857213973999, "step": 94 }, { "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.49102341358484325, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.3309030172409484, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00017896086578106374, "calib/mean_conf": 0.990385775861638, "calib/mu_c": 0.9903248366013072, "calib/mu_w": 0.9905037974670883, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3309030172409484, "calib/std_conf": 0.0019203123156793884, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2909.0, "completions/max_terminated_length": 2909.0, "completions/mean_length": 620.13671875, "completions/mean_terminated_length": 672.690673828125, "completions/min_length": 0.0, "completions/min_terminated_length": 330.0, "epoch": 0.10133333333333333, "grad_norm": 0.0158828254789114, "learning_rate": 2.916666666666667e-06, "loss": -0.1137, "num_tokens": 25833574.0, "reward": 1.6486221551895142, "reward_std": 0.6622723340988159, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.599368691444397, "rewards/format_reward_step": 0.89453125, "rewards/stepwise_brier_reward": 0.620120108127594, "step": 95 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.21689075630252097, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.21689075630252097, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 605.23828125, "completions/mean_terminated_length": 637.6172485351562, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.1024, "grad_norm": 0.014743155799806118, "learning_rate": 2.888888888888889e-06, "loss": -0.0544, "num_tokens": 26094331.0, "reward": 1.9054555892944336, "reward_std": 0.5066296458244324, "rewards/accuracy_reward_step": 0.71875, "rewards/final_brier_reward_step": 0.7188921570777893, "rewards/format_reward_step": 0.921875, "rewards/stepwise_brier_reward": 0.7466801404953003, "step": 96 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4942528735632184, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.33930923694779114, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002528735632186807, "calib/mean_conf": 0.9899116465863453, "calib/mu_c": 0.99, "calib/mu_w": 0.9897471264367813, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33930923694779114, "calib/std_conf": 0.0026586648707911872, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 574.765625, "completions/mean_terminated_length": 588.5599975585938, "completions/min_length": 0.0, "completions/min_terminated_length": 275.0, "epoch": 0.10346666666666667, "grad_norm": 0.016557862982153893, "learning_rate": 2.861111111111111e-06, "loss": -0.0397, "num_tokens": 26346543.0, "reward": 1.7600922584533691, "reward_std": 0.4420820474624634, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.63569176197052, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.6781147718429565, "step": 97 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49673202614379086, "calib/avg_num_step_conf": 4.890625, "calib/ece": 0.3802788844621514, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0002614379084966423, "calib/mean_conf": 0.9898406374501992, "calib/mu_c": 0.9897385620915033, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3802788844621514, "calib/std_conf": 0.002519743155512655, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2586.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 603.6953125, "completions/mean_terminated_length": 613.27783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 309.0, "epoch": 0.10453333333333334, "grad_norm": 0.027152279391884804, "learning_rate": 2.8333333333333335e-06, "loss": -0.0164, "num_tokens": 26607273.0, "reward": 1.6960666179656982, "reward_std": 0.5141116380691528, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6012991666793823, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6439046859741211, "step": 98 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5081300813008129, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.4836546184738957, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006504065040648932, "calib/mean_conf": 0.9896787148594378, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.989349593495935, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4836546184738957, "calib/std_conf": 0.003570459561589219, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 604.25390625, "completions/mean_terminated_length": 616.2908325195312, "completions/min_length": 0.0, "completions/min_terminated_length": 317.0, "epoch": 0.1056, "grad_norm": 0.013146537356078625, "learning_rate": 2.805555555555556e-06, "loss": 0.0024, "num_tokens": 26867762.0, "reward": 1.4687217473983765, "reward_std": 0.42152565717697144, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.4981667995452881, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.5095323920249939, "step": 99 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5055555555555555, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.34397637795275593, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.0010000000000000009, "calib/mean_conf": 0.9896456692913386, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9889999999999999, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.34397637795275593, "calib/std_conf": 0.005635974940365421, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1190.0, "completions/max_terminated_length": 1190.0, "completions/mean_length": 579.734375, "completions/mean_terminated_length": 588.9365234375, "completions/min_length": 0.0, "completions/min_terminated_length": 359.0, "epoch": 0.10666666666666667, "grad_norm": 0.01472147461026907, "learning_rate": 2.7777777777777783e-06, "loss": -0.0164, "num_tokens": 27123582.0, "reward": 1.7837918996810913, "reward_std": 0.3634037673473358, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6404097080230713, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6822579503059387, "step": 100 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5047619047619047, "calib/avg_num_step_conf": 4.77734375, "calib/ece": 0.3978823529411765, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.00942857142857123, "calib/mean_conf": 0.9861176470588234, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9805714285714285, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3978823529411765, "calib/std_conf": 0.061874524219624055, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 950.0, "completions/max_terminated_length": 950.0, "completions/mean_length": 560.92578125, "completions/mean_terminated_length": 569.8294067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 286.0, "epoch": 0.10773333333333333, "grad_norm": 0.018577059730887413, "learning_rate": 2.7500000000000004e-06, "loss": -0.0101, "num_tokens": 27374171.0, "reward": 1.6812111139297485, "reward_std": 0.45339512825012207, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.5976362824440002, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6428329944610596, "step": 101 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5140845070422535, "calib/avg_num_step_conf": 4.9765625, "calib/ece": 0.26726562500000006, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002816901408453365, "calib/mean_conf": 0.9899218750000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9897183098591547, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26726562500000006, "calib/std_conf": 0.0008804240366863011, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 981.0, "completions/max_terminated_length": 981.0, "completions/mean_length": 532.2734375, "completions/mean_terminated_length": 540.7222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 304.0, "epoch": 0.1088, "grad_norm": 0.022051550447940826, "learning_rate": 2.7222222222222224e-06, "loss": -0.0047, "num_tokens": 27617129.0, "reward": 1.951385259628296, "reward_std": 0.3563922047615051, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.724351167678833, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7530649900436401, "step": 102 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5048543689320388, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.39388235294117646, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 9.708737864111416e-05, "calib/mean_conf": 0.9899607843137255, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.989902912621359, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.39388235294117646, "calib/std_conf": 0.0006249951941376175, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1021.0, "completions/max_terminated_length": 1021.0, "completions/mean_length": 568.23828125, "completions/mean_terminated_length": 577.2579956054688, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.10986666666666667, "grad_norm": 0.013331396505236626, "learning_rate": 2.6944444444444444e-06, "loss": -0.024, "num_tokens": 27867150.0, "reward": 1.6928791999816895, "reward_std": 0.3353043496608734, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5938847064971924, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6463817358016968, "step": 103 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5004629629629629, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.4602745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.7037037036835585e-05, "calib/mean_conf": 0.989686274509804, "calib/mu_c": 0.9897037037037036, "calib/mu_w": 0.9896666666666668, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4602745098039216, "calib/std_conf": 0.003528540197396707, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1184.0, "completions/max_terminated_length": 1184.0, "completions/mean_length": 550.453125, "completions/mean_terminated_length": 559.1904907226562, "completions/min_length": 0.0, "completions/min_terminated_length": 307.0, "epoch": 0.11093333333333333, "grad_norm": 0.019555984064936638, "learning_rate": 2.666666666666667e-06, "loss": -0.0098, "num_tokens": 28114746.0, "reward": 1.5784821510314941, "reward_std": 0.47673073410987854, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5368350744247437, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6286554932594299, "step": 104 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.3337500000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.3337500000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1196.0, "completions/max_terminated_length": 1196.0, "completions/mean_length": 579.578125, "completions/mean_terminated_length": 588.77783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 334.0, "epoch": 0.112, "grad_norm": 0.01764088310301304, "learning_rate": 2.6388888888888893e-06, "loss": -0.0063, "num_tokens": 28368878.0, "reward": 1.8065965175628662, "reward_std": 0.5221689343452454, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6587303876876831, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6770305633544922, "step": 105 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.32725490196078433, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32725490196078433, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 970.0, "completions/max_terminated_length": 970.0, "completions/mean_length": 557.8984375, "completions/mean_terminated_length": 566.7540283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 351.0, "epoch": 0.11306666666666666, "grad_norm": 0.030452396720647812, "learning_rate": 2.6111111111111113e-06, "loss": -0.0206, "num_tokens": 28616284.0, "reward": 1.8300036191940308, "reward_std": 0.255470335483551, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6628695130348206, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7118322849273682, "step": 106 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.3390196078431372, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3390196078431372, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 959.0, "completions/max_terminated_length": 959.0, "completions/mean_length": 577.65625, "completions/mean_terminated_length": 586.825439453125, "completions/min_length": 0.0, "completions/min_terminated_length": 302.0, "epoch": 0.11413333333333334, "grad_norm": 0.01888842135667801, "learning_rate": 2.5833333333333337e-06, "loss": -0.0032, "num_tokens": 28868780.0, "reward": 1.8138813972473145, "reward_std": 0.4561561942100525, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6551355123519897, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7332028150558472, "step": 107 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5075757575757576, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.24765625000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006060606060603879, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9893939393939394, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24765625000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1136.0, "completions/max_terminated_length": 1136.0, "completions/mean_length": 606.58203125, "completions/mean_terminated_length": 616.2103271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1152, "grad_norm": 0.0432201586663723, "learning_rate": 2.5555555555555557e-06, "loss": 0.0091, "num_tokens": 29127297.0, "reward": 1.9844746589660645, "reward_std": 0.38650619983673096, "rewards/accuracy_reward_step": 0.7421875, "rewards/final_brier_reward_step": 0.7396574020385742, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7685538530349731, "step": 108 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5042016806722689, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.4583464566929134, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00033613445378166684, "calib/mean_conf": 0.9898425196850394, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9896638655462182, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4583464566929134, "calib/std_conf": 0.0025048777512735247, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 608.46875, "completions/mean_terminated_length": 618.1270141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.11626666666666667, "grad_norm": 0.027083788067102432, "learning_rate": 2.5277777777777778e-06, "loss": -0.0143, "num_tokens": 29387665.0, "reward": 1.5690433979034424, "reward_std": 0.3326517939567566, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5328608751296997, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6105002164840698, "step": 109 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.2634375000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2634375000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1095.0, "completions/max_terminated_length": 1095.0, "completions/mean_length": 558.4375, "completions/mean_terminated_length": 567.3016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.11733333333333333, "grad_norm": 0.02330888621509075, "learning_rate": 2.5e-06, "loss": -0.0048, "num_tokens": 29635545.0, "reward": 1.9650754928588867, "reward_std": 0.2993090748786926, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.7280253171920776, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7807142734527588, "step": 110 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.63671875, "calib/ece": 0.2868750000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2868750000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1105.0, "completions/max_terminated_length": 1105.0, "completions/mean_length": 582.09765625, "completions/mean_terminated_length": 591.3373413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1184, "grad_norm": 0.022671138867735863, "learning_rate": 2.4722222222222226e-06, "loss": 0.0106, "num_tokens": 29891970.0, "reward": 1.9075825214385986, "reward_std": 0.394694060087204, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7009952664375305, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7418348789215088, "step": 111 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5052083333333333, "calib/avg_num_step_conf": 2.2734375, "calib/ece": 0.36779527559055125, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00041666666666639873, "calib/mean_conf": 0.9898425196850394, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9895833333333334, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.36779527559055125, "calib/std_conf": 0.0025048777512735247, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 521.0546875, "completions/mean_terminated_length": 527.2332153320312, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.11946666666666667, "grad_norm": 0.021626276895403862, "learning_rate": 2.4444444444444447e-06, "loss": 0.0012, "num_tokens": 30133280.0, "reward": 1.7234654426574707, "reward_std": 0.3433002233505249, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6124820113182068, "rewards/format_reward_step": 0.9609375, "rewards/stepwise_brier_reward": 0.6563798189163208, "step": 112 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.49609375, "calib/ece": 0.3381781376518217, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9899999999999999, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3381781376518217, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1766.0, "completions/max_terminated_length": 1766.0, "completions/mean_length": 424.765625, "completions/mean_terminated_length": 431.5079650878906, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.12053333333333334, "grad_norm": 0.03900357708334923, "learning_rate": 2.4166666666666667e-06, "loss": 0.0037, "num_tokens": 30347220.0, "reward": 1.7606676816940308, "reward_std": 0.4159401059150696, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6276389956474304, "rewards/format_reward_step": 0.953125, "rewards/stepwise_brier_reward": 0.6650315523147583, "step": 113 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5073529411764706, "calib/avg_num_step_conf": 1.91796875, "calib/ece": 0.2607569721115538, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005882352941175562, "calib/mean_conf": 0.9898406374501992, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9894117647058822, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2607569721115538, "calib/std_conf": 0.0025197431555126553, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 999.0, "completions/max_terminated_length": 999.0, "completions/mean_length": 442.65625, "completions/mean_terminated_length": 449.68255615234375, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1216, "grad_norm": 0.02818751521408558, "learning_rate": 2.388888888888889e-06, "loss": -0.0079, "num_tokens": 30565564.0, "reward": 1.9415286779403687, "reward_std": 0.31948792934417725, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7203612923622131, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7020031809806824, "step": 114 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 1.98828125, "calib/ece": 0.30496062992125983, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30496062992125983, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 812.0, "completions/max_terminated_length": 812.0, "completions/mean_length": 482.14453125, "completions/mean_terminated_length": 489.7976379394531, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.12266666666666666, "grad_norm": 0.018928449600934982, "learning_rate": 2.361111111111111e-06, "loss": -0.0046, "num_tokens": 30794257.0, "reward": 1.8706040382385254, "reward_std": 0.28297024965286255, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6818546652793884, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.706811249256134, "step": 115 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.19921875, "calib/ece": 0.27740157480314964, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.27740157480314964, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 551.5390625, "completions/mean_terminated_length": 562.5259399414062, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.12373333333333333, "grad_norm": 0.026971513405442238, "learning_rate": 2.3333333333333336e-06, "loss": -0.0107, "num_tokens": 31039971.0, "reward": 1.9170682430267334, "reward_std": 0.36526399850845337, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7047457098960876, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7604023218154907, "step": 116 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 2.35546875, "calib/ece": 0.4252941176470588, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.4252941176470588, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 537.69140625, "completions/mean_terminated_length": 544.0671997070312, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.1248, "grad_norm": 0.020982950925827026, "learning_rate": 2.305555555555556e-06, "loss": -0.0025, "num_tokens": 31284220.0, "reward": 1.643857717514038, "reward_std": 0.2697311341762543, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5707613229751587, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6687314510345459, "step": 117 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 3.91015625, "calib/ece": 0.3493750000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3493750000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1261.0, "completions/max_terminated_length": 1261.0, "completions/mean_length": 660.16796875, "completions/mean_terminated_length": 670.6468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 305.0, "epoch": 0.12586666666666665, "grad_norm": 0.018190357834100723, "learning_rate": 2.277777777777778e-06, "loss": 0.0059, "num_tokens": 31557231.0, "reward": 1.787449598312378, "reward_std": 0.31239908933639526, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.6437289118766785, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6779447793960571, "step": 118 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5120481927710843, "calib/avg_num_step_conf": 4.29296875, "calib/ece": 0.3151764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0009638554216867545, "calib/mean_conf": 0.989686274509804, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9890361445783131, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3151764705882353, "calib/std_conf": 0.0035285401973967068, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 721.1015625, "completions/mean_terminated_length": 729.6522216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 369.0, "epoch": 0.12693333333333334, "grad_norm": 0.016771312803030014, "learning_rate": 2.25e-06, "loss": 0.0116, "num_tokens": 31846897.0, "reward": 1.8449645042419434, "reward_std": 0.3058851957321167, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.6744238138198853, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.7132465839385986, "step": 119 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5063291139240507, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.2963137254901961, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.011265822784810697, "calib/mean_conf": 0.9865098039215686, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9787341772151894, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2963137254901961, "calib/std_conf": 0.05562457227824789, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 716.73828125, "completions/mean_terminated_length": 728.1151123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 375.0, "epoch": 0.128, "grad_norm": 0.015937916934490204, "learning_rate": 2.222222222222222e-06, "loss": -0.0076, "num_tokens": 32137070.0, "reward": 1.8887319564819336, "reward_std": 0.39160123467445374, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6934167742729187, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7599482536315918, "step": 120 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.9296875, "calib/ece": 0.3142187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3142187500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1378.0, "completions/max_terminated_length": 1378.0, "completions/mean_length": 765.953125, "completions/mean_terminated_length": 778.1111450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 406.0, "epoch": 0.12906666666666666, "grad_norm": 0.014766894280910492, "learning_rate": 2.1944444444444445e-06, "loss": -0.0073, "num_tokens": 32438210.0, "reward": 1.8706910610198975, "reward_std": 0.38398224115371704, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6821656227111816, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7459110021591187, "step": 121 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.8203125, "calib/ece": 0.26952755905511805, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.26952755905511805, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1984.0, "completions/max_terminated_length": 1984.0, "completions/mean_length": 771.515625, "completions/mean_terminated_length": 786.8844604492188, "completions/min_length": 0.0, "completions/min_terminated_length": 441.0, "epoch": 0.13013333333333332, "grad_norm": 0.016249533742666245, "learning_rate": 2.166666666666667e-06, "loss": -0.0174, "num_tokens": 32743062.0, "reward": 1.937490701675415, "reward_std": 0.2781732380390167, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7201359272003174, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7720146179199219, "step": 122 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 4.98828125, "calib/ece": 0.3154901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3154901960784313, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1635.0, "completions/max_terminated_length": 1635.0, "completions/mean_length": 782.2265625, "completions/mean_terminated_length": 794.6428833007812, "completions/min_length": 0.0, "completions/min_terminated_length": 414.0, "epoch": 0.1312, "grad_norm": 0.015606064349412918, "learning_rate": 2.138888888888889e-06, "loss": -0.0121, "num_tokens": 33048600.0, "reward": 1.850453495979309, "reward_std": 0.41477206349372864, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.6782597303390503, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7001171112060547, "step": 123 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.00390625, "calib/ece": 0.2537795275590551, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2537795275590551, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 758.68359375, "completions/mean_terminated_length": 767.6798706054688, "completions/min_length": 0.0, "completions/min_terminated_length": 386.0, "epoch": 0.13226666666666667, "grad_norm": 0.01349243987351656, "learning_rate": 2.1111111111111114e-06, "loss": -0.0228, "num_tokens": 33349639.0, "reward": 1.9734143018722534, "reward_std": 0.2765697240829468, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7355260848999023, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.798755943775177, "step": 124 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.484375, "calib/ece": 0.4079687500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4079687500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1572.0, "completions/max_terminated_length": 1572.0, "completions/mean_length": 790.0234375, "completions/mean_terminated_length": 802.5635375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 409.0, "epoch": 0.13333333333333333, "grad_norm": 0.01633782498538494, "learning_rate": 2.0833333333333334e-06, "loss": 0.0061, "num_tokens": 33656693.0, "reward": 1.6857736110687256, "reward_std": 0.39842092990875244, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5902905464172363, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6606166362762451, "step": 125 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5053763440860215, "calib/avg_num_step_conf": 5.95703125, "calib/ece": 0.35598425196850403, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00043010752688177334, "calib/mean_conf": 0.9898425196850394, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9895698924731181, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.35598425196850403, "calib/std_conf": 0.0025048777512735247, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1829.0, "completions/max_terminated_length": 1829.0, "completions/mean_length": 814.25390625, "completions/mean_terminated_length": 827.1785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 425.0, "epoch": 0.1344, "grad_norm": 0.014358713291585445, "learning_rate": 2.0555555555555555e-06, "loss": -0.0459, "num_tokens": 33970606.0, "reward": 1.7619569301605225, "reward_std": 0.3340670168399811, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6362203359603882, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6694203615188599, "step": 126 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.66015625, "calib/ece": 0.3987301587301588, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3987301587301588, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2625.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 825.26171875, "completions/mean_terminated_length": 835.0474853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.13546666666666668, "grad_norm": 0.016708409413695335, "learning_rate": 2.027777777777778e-06, "loss": 0.0111, "num_tokens": 34285545.0, "reward": 1.671630620956421, "reward_std": 0.4748235046863556, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5899796485900879, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6356050968170166, "step": 127 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49832301341589263, "calib/avg_num_step_conf": 6.03515625, "calib/ece": 0.39169291338582685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9921259842519685, "calib/gap": 2.0639834881541752e-05, "calib/mean_conf": 0.9881496062992127, "calib/mu_c": 0.988157894736842, "calib/mu_w": 0.9881372549019605, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39070866141732286, "calib/std_conf": 0.01928141316257154, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 827.19140625, "completions/mean_terminated_length": 840.3214721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 413.0, "epoch": 0.13653333333333334, "grad_norm": 0.0164869986474514, "learning_rate": 2.0000000000000003e-06, "loss": -0.017, "num_tokens": 34603970.0, "reward": 1.705299973487854, "reward_std": 0.37409543991088867, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6026171445846558, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6795204281806946, "step": 128 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5067567567567568, "calib/avg_num_step_conf": 7.16796875, "calib/ece": 0.28233201581027667, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000540540540540424, "calib/mean_conf": 0.9898418972332016, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9894594594594595, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28233201581027667, "calib/std_conf": 0.00250980361523914, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1890.0, "completions/max_terminated_length": 1890.0, "completions/mean_length": 822.20703125, "completions/mean_terminated_length": 838.585693359375, "completions/min_length": 0.0, "completions/min_terminated_length": 547.0, "epoch": 0.1376, "grad_norm": 0.01689060591161251, "learning_rate": 1.9722222222222224e-06, "loss": -0.0346, "num_tokens": 34916839.0, "reward": 1.9011118412017822, "reward_std": 0.37755003571510315, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7051265239715576, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7352585196495056, "step": 129 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5304878048780488, "calib/avg_num_step_conf": 8.2421875, "calib/ece": 0.32387755102040816, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0024390243902440156, "calib/mean_conf": 0.9891836734693877, "calib/mu_c": 0.99, "calib/mu_w": 0.987560975609756, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.32387755102040816, "calib/std_conf": 0.00565567610634736, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 890.4375, "completions/mean_terminated_length": 926.6340942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 553.0, "epoch": 0.13866666666666666, "grad_norm": 0.011856338940560818, "learning_rate": 1.944444444444445e-06, "loss": -0.0684, "num_tokens": 35250079.0, "reward": 1.772405743598938, "reward_std": 0.3086838722229004, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6405613422393799, "rewards/format_reward_step": 0.94921875, "rewards/stepwise_brier_reward": 0.7303117513656616, "step": 130 }, { "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.5043103448275862, "calib/avg_num_step_conf": 10.23828125, "calib/ece": 0.4813559322033898, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0003448275862071304, "calib/mean_conf": 0.9898305084745762, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.989655172413793, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.4813559322033898, "calib/std_conf": 0.002598255884195915, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2647.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 1048.74609375, "completions/mean_terminated_length": 1128.0631103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 619.0, "epoch": 0.13973333333333332, "grad_norm": 0.012333076447248459, "learning_rate": 1.916666666666667e-06, "loss": -0.0836, "num_tokens": 35624766.0, "reward": 1.3999123573303223, "reward_std": 0.5030518770217896, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.4699007570743561, "rewards/format_reward_step": 0.8984375, "rewards/stepwise_brier_reward": 0.5203734040260315, "step": 131 }, { "calib/answer_extract_rate": 0.71875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 14.734375, "calib/ece": 0.23193548387096763, "calib/final_conf_rate": 0.7265625, "calib/format_rate": 0.71484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.440892098500626e-16, "calib/mean_conf": 0.9899999999999999, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 0.7265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23193548387096763, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.24609375, "completions/max_length": 2778.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 989.8671875, "completions/mean_terminated_length": 1312.984375, "completions/min_length": 0.0, "completions/min_terminated_length": 750.0, "epoch": 0.1408, "grad_norm": 0.01676846109330654, "learning_rate": 1.888888888888889e-06, "loss": -0.2793, "num_tokens": 35983764.0, "reward": 1.461935043334961, "reward_std": 0.9555952548980713, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5501629114151001, "rewards/format_reward_step": 0.71484375, "rewards/stepwise_brier_reward": 0.5632020235061646, "step": 132 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.5052631578947369, "calib/avg_num_step_conf": 11.91796875, "calib/ece": 0.40052401746724886, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.9956331877729258, "calib/gap": 0.01042105263157933, "calib/mean_conf": 0.9856768558951965, "calib/mu_c": 0.99, "calib/mu_w": 0.9795789473684207, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40052401746724886, "calib/std_conf": 0.0652780444621663, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 1199.4765625, "completions/mean_terminated_length": 1335.0694580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 832.0, "epoch": 0.14186666666666667, "grad_norm": 0.011118494905531406, "learning_rate": 1.8611111111111113e-06, "loss": -0.1199, "num_tokens": 36397174.0, "reward": 1.5080835819244385, "reward_std": 0.6873135566711426, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.53452068567276, "rewards/format_reward_step": 0.890625, "rewards/stepwise_brier_reward": 0.575938880443573, "step": 133 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5, "calib/avg_num_step_conf": 10.70703125, "calib/ece": 0.40975308641975305, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.40975308641975305, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2778.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 1273.45703125, "completions/mean_terminated_length": 1325.2235107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 609.0, "epoch": 0.14293333333333333, "grad_norm": 0.009259031154215336, "learning_rate": 1.8333333333333333e-06, "loss": -0.0652, "num_tokens": 36832131.0, "reward": 1.5892717838287354, "reward_std": 0.6638240814208984, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5584218502044678, "rewards/format_reward_step": 0.9375, "rewards/stepwise_brier_reward": 0.6189777851104736, "step": 134 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5, "calib/avg_num_step_conf": 10.3046875, "calib/ece": 0.2927888446215139, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2927888446215139, "calib/std_conf": 0.0, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2527.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 1229.83984375, "completions/mean_terminated_length": 1259.3560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 763.0, "epoch": 0.144, "grad_norm": 0.009267253801226616, "learning_rate": 1.8055555555555557e-06, "loss": -0.0236, "num_tokens": 37252850.0, "reward": 1.8674653768539429, "reward_std": 0.46042320132255554, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6893554329872131, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.7258185148239136, "step": 135 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4917748917748918, "calib/avg_num_step_conf": 9.94921875, "calib/ece": 0.413266129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0003749583749584895, "calib/mean_conf": 0.9898790322580645, "calib/mu_c": 0.9897202797202795, "calib/mu_w": 0.990095238095238, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.413266129032258, "calib/std_conf": 0.002615378656158313, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2607.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 1216.07421875, "completions/mean_terminated_length": 1250.260986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 747.0, "epoch": 0.14506666666666668, "grad_norm": 0.011020061559975147, "learning_rate": 1.777777777777778e-06, "loss": -0.0074, "num_tokens": 37672653.0, "reward": 1.6056492328643799, "reward_std": 0.4844512939453125, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5663796663284302, "rewards/format_reward_step": 0.95703125, "rewards/stepwise_brier_reward": 0.590592622756958, "step": 136 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 9.38671875, "calib/ece": 0.29241935483870973, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.29241935483870973, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2941.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 1233.16796875, "completions/mean_terminated_length": 1272.947509765625, "completions/min_length": 0.0, "completions/min_terminated_length": 798.0, "epoch": 0.14613333333333334, "grad_norm": 0.007238389924168587, "learning_rate": 1.75e-06, "loss": -0.0266, "num_tokens": 38095328.0, "reward": 1.8489694595336914, "reward_std": 0.34204357862472534, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6814659833908081, "rewards/format_reward_step": 0.96484375, "rewards/stepwise_brier_reward": 0.7300364375114441, "step": 137 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 8.7421875, "calib/ece": 0.35862745098039217, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.35862745098039217, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2713.0, "completions/max_terminated_length": 2713.0, "completions/mean_length": 1092.24609375, "completions/mean_terminated_length": 1105.1976318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 705.0, "epoch": 0.1472, "grad_norm": 0.010518020950257778, "learning_rate": 1.7222222222222224e-06, "loss": 0.0063, "num_tokens": 38479279.0, "reward": 1.7698776721954346, "reward_std": 0.3144071698188782, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.6321667432785034, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6973439455032349, "step": 138 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5028735632183908, "calib/avg_num_step_conf": 7.91015625, "calib/ece": 0.30499999999999994, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.747126436772554e-05, "calib/mean_conf": 0.9900393700787401, "calib/mu_c": 0.9900574712643677, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30499999999999994, "calib/std_conf": 0.0006262194378183812, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1907.0, "completions/max_terminated_length": 1907.0, "completions/mean_length": 956.609375, "completions/mean_terminated_length": 975.6653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 707.0, "epoch": 0.14826666666666666, "grad_norm": 0.010206053964793682, "learning_rate": 1.6944444444444446e-06, "loss": -0.0121, "num_tokens": 38827267.0, "reward": 1.8824926614761353, "reward_std": 0.32546108961105347, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.6858386993408203, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.781632125377655, "step": 139 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5076923076923077, "calib/avg_num_step_conf": 7.37109375, "calib/ece": 0.24675889328063239, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006153846153845732, "calib/mean_conf": 0.9898418972332016, "calib/mu_c": 0.99, "calib/mu_w": 0.9893846153846154, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24675889328063239, "calib/std_conf": 0.00250980361523914, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2020.0, "completions/max_terminated_length": 2020.0, "completions/mean_length": 963.3359375, "completions/mean_terminated_length": 982.5259399414062, "completions/min_length": 0.0, "completions/min_terminated_length": 637.0, "epoch": 0.14933333333333335, "grad_norm": 0.010636700317263603, "learning_rate": 1.6666666666666667e-06, "loss": -0.0251, "num_tokens": 39178897.0, "reward": 1.9774314165115356, "reward_std": 0.34751075506210327, "rewards/accuracy_reward_step": 0.734375, "rewards/final_brier_reward_step": 0.7396574020385742, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7872557044029236, "step": 140 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 7.05859375, "calib/ece": 0.2634375000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2634375000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1658.0, "completions/max_terminated_length": 1658.0, "completions/mean_length": 953.55078125, "completions/mean_terminated_length": 968.6865844726562, "completions/min_length": 0.0, "completions/min_terminated_length": 602.0, "epoch": 0.1504, "grad_norm": 0.008473373018205166, "learning_rate": 1.638888888888889e-06, "loss": 0.0128, "num_tokens": 39530102.0, "reward": 1.9683520793914795, "reward_std": 0.2899768352508545, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.731931209564209, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.782101571559906, "step": 141 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5052083333333333, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.3707936507936509, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004166666666666208, "calib/mean_conf": 0.98984126984127, "calib/mu_c": 0.99, "calib/mu_w": 0.9895833333333334, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3707936507936509, "calib/std_conf": 0.0025147586536118844, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1816.0, "completions/max_terminated_length": 1816.0, "completions/mean_length": 939.69140625, "completions/mean_terminated_length": 958.410400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 600.0, "epoch": 0.15146666666666667, "grad_norm": 0.011722843162715435, "learning_rate": 1.6111111111111113e-06, "loss": -0.0243, "num_tokens": 39875823.0, "reward": 1.7259165048599243, "reward_std": 0.4010692834854126, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6166988015174866, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.6697796583175659, "step": 142 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.80859375, "calib/ece": 0.3181250000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3181250000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2091.0, "completions/max_terminated_length": 2091.0, "completions/mean_length": 901.0390625, "completions/mean_terminated_length": 915.34130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 564.0, "epoch": 0.15253333333333333, "grad_norm": 0.010479445569217205, "learning_rate": 1.5833333333333333e-06, "loss": -0.0031, "num_tokens": 40213825.0, "reward": 1.8537890911102295, "reward_std": 0.26736265420913696, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.6783374547958374, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7055689096450806, "step": 143 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.6953125, "calib/ece": 0.21745098039215682, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21745098039215682, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1860.0, "completions/max_terminated_length": 1860.0, "completions/mean_length": 885.05078125, "completions/mean_terminated_length": 899.0992431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 520.0, "epoch": 0.1536, "grad_norm": 0.010390263050794601, "learning_rate": 1.5555555555555558e-06, "loss": -0.0118, "num_tokens": 40544526.0, "reward": 2.0450034141540527, "reward_std": 0.3080452084541321, "rewards/accuracy_reward_step": 0.76953125, "rewards/final_brier_reward_step": 0.7738851308822632, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8045663833618164, "step": 144 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5149253731343284, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.2524313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0011940298507464586, "calib/mean_conf": 0.989686274509804, "calib/mu_c": 0.99, "calib/mu_w": 0.9888059701492535, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2524313725490196, "calib/std_conf": 0.0035285401973967063, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1245.0, "completions/max_terminated_length": 1245.0, "completions/mean_length": 838.34765625, "completions/mean_terminated_length": 851.65478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 519.0, "epoch": 0.15466666666666667, "grad_norm": 0.010068155825138092, "learning_rate": 1.527777777777778e-06, "loss": -0.0184, "num_tokens": 40861847.0, "reward": 1.9680100679397583, "reward_std": 0.4435754418373108, "rewards/accuracy_reward_step": 0.734375, "rewards/final_brier_reward_step": 0.7401160001754761, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7334867715835571, "step": 145 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5046728971962617, "calib/avg_num_step_conf": 6.50390625, "calib/ece": 0.4078515625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0002803738317758153, "calib/mean_conf": 0.9898828125000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9897196261682242, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4078515625000001, "calib/std_conf": 0.0018713343073442964, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1567.0, "completions/max_terminated_length": 1567.0, "completions/mean_length": 868.91796875, "completions/mean_terminated_length": 882.7103881835938, "completions/min_length": 0.0, "completions/min_terminated_length": 523.0, "epoch": 0.15573333333333333, "grad_norm": 0.011212436482310295, "learning_rate": 1.5e-06, "loss": 0.0043, "num_tokens": 41191506.0, "reward": 1.6800085306167603, "reward_std": 0.34910422563552856, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5904414057731628, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6452174782752991, "step": 146 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5141509433962264, "calib/avg_num_step_conf": 6.41796875, "calib/ece": 0.40688976377952757, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0010377358490565314, "calib/mean_conf": 0.9895669291338582, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9889622641509433, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.40688976377952757, "calib/std_conf": 0.003994268632473766, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 832.86328125, "completions/mean_terminated_length": 849.4542236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 453.0, "epoch": 0.1568, "grad_norm": 0.012004495598375797, "learning_rate": 1.4722222222222225e-06, "loss": -0.0085, "num_tokens": 41508399.0, "reward": 1.6718380451202393, "reward_std": 0.44913479685783386, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5870640277862549, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6549757719039917, "step": 147 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5081967213114754, "calib/avg_num_step_conf": 6.41015625, "calib/ece": 0.22812500000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0006557377049178914, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9893442622950822, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22812500000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1685.0, "completions/max_terminated_length": 1685.0, "completions/mean_length": 822.83984375, "completions/mean_terminated_length": 835.9008178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 342.0, "epoch": 0.15786666666666666, "grad_norm": 0.011262796819210052, "learning_rate": 1.4444444444444445e-06, "loss": 0.0045, "num_tokens": 41824158.0, "reward": 2.0360844135284424, "reward_std": 0.37539172172546387, "rewards/accuracy_reward_step": 0.76171875, "rewards/final_brier_reward_step": 0.7666874527931213, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8073375821113586, "step": 148 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.3025000000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3025000000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 844.06640625, "completions/mean_terminated_length": 857.46435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 516.0, "epoch": 0.15893333333333334, "grad_norm": 0.012008114717900753, "learning_rate": 1.4166666666666667e-06, "loss": 0.0068, "num_tokens": 42144695.0, "reward": 1.8902229070663452, "reward_std": 0.2819676399230957, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6936500072479248, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7422417998313904, "step": 149 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.4140625, "calib/ece": 0.3154901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3154901960784313, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2325.0, "completions/max_terminated_length": 2325.0, "completions/mean_length": 792.0078125, "completions/mean_terminated_length": 804.5794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 419.0, "epoch": 0.16, "grad_norm": 0.010181116871535778, "learning_rate": 1.3888888888888892e-06, "loss": -0.0149, "num_tokens": 42452409.0, "reward": 1.8635663986206055, "reward_std": 0.21723566949367523, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.6782597303390503, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7525684833526611, "step": 150 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.3767187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3767187500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 858.32421875, "completions/mean_terminated_length": 871.948486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 495.0, "epoch": 0.16106666666666666, "grad_norm": 0.010340590961277485, "learning_rate": 1.3611111111111112e-06, "loss": 0.0127, "num_tokens": 42779164.0, "reward": 1.744431972503662, "reward_std": 0.3037966191768646, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6208378672599792, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.685015082359314, "step": 151 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.50625, "calib/avg_num_step_conf": 6.3203125, "calib/ece": 0.3060474308300395, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004999999999999449, "calib/mean_conf": 0.9898418972332016, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9894999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3060474308300395, "calib/std_conf": 0.0025098036152391397, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1740.0, "completions/max_terminated_length": 1740.0, "completions/mean_length": 831.84375, "completions/mean_terminated_length": 848.4143676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 492.0, "epoch": 0.16213333333333332, "grad_norm": 0.011317038908600807, "learning_rate": 1.3333333333333334e-06, "loss": -0.0204, "num_tokens": 43097508.0, "reward": 1.8634164333343506, "reward_std": 0.41251468658447266, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6822355389595032, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7401804327964783, "step": 152 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5064102564102564, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.2921960784313726, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.012051282051282475, "calib/mean_conf": 0.986313725490196, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9779487179487176, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2921960784313726, "calib/std_conf": 0.058749548248935975, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 858.13671875, "completions/mean_terminated_length": 868.3123168945312, "completions/min_length": 0.0, "completions/min_terminated_length": 486.0, "epoch": 0.1632, "grad_norm": 0.009934207424521446, "learning_rate": 1.3055555555555556e-06, "loss": 0.0058, "num_tokens": 43424511.0, "reward": 1.892085075378418, "reward_std": 0.27568596601486206, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.6972448825836182, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7460954189300537, "step": 153 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.28515625, "calib/ece": 0.39784313725490195, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.440892098500626e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999997, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39784313725490195, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1861.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 825.82421875, "completions/mean_terminated_length": 838.9326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 431.0, "epoch": 0.16426666666666667, "grad_norm": 0.010299614630639553, "learning_rate": 1.2777777777777779e-06, "loss": 0.006, "num_tokens": 43740362.0, "reward": 1.6944944858551025, "reward_std": 0.31959229707717896, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5978691577911377, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6488587856292725, "step": 154 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.24609375, "calib/ece": 0.4056862745098039, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.4056862745098039, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1397.0, "completions/max_terminated_length": 1397.0, "completions/mean_length": 798.93359375, "completions/mean_terminated_length": 811.6151123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 494.0, "epoch": 0.16533333333333333, "grad_norm": 0.011612067930400372, "learning_rate": 1.25e-06, "loss": -0.0111, "num_tokens": 44052105.0, "reward": 1.667478322982788, "reward_std": 0.5500566363334656, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5860737562179565, "rewards/format_reward_step": 0.98046875, "rewards/stepwise_brier_reward": 0.630714476108551, "step": 155 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5164835164835164, "calib/avg_num_step_conf": 6.16796875, "calib/ece": 0.3448437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.0017582417582415744, "calib/mean_conf": 0.9893750000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9882417582417582, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3448437500000001, "calib/std_conf": 0.006404344228724747, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1703.0, "completions/max_terminated_length": 1703.0, "completions/mean_length": 809.5703125, "completions/mean_terminated_length": 822.420654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 475.0, "epoch": 0.1664, "grad_norm": 0.015950413420796394, "learning_rate": 1.2222222222222223e-06, "loss": 0.0128, "num_tokens": 44364115.0, "reward": 1.815731406211853, "reward_std": 0.31029415130615234, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6527366638183594, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7430015802383423, "step": 156 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5096153846153846, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.1900392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.996078431372549, "calib/gap": 0.019038461538461338, "calib/mean_conf": 0.9861176470588234, "calib/mu_c": 0.99, "calib/mu_w": 0.9709615384615387, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1900392156862745, "calib/std_conf": 0.06187452421962405, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1220.0, "completions/max_terminated_length": 1220.0, "completions/mean_length": 780.29296875, "completions/mean_terminated_length": 792.6785888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 435.0, "epoch": 0.16746666666666668, "grad_norm": 0.009241663850843906, "learning_rate": 1.1944444444444446e-06, "loss": -0.0087, "num_tokens": 44667598.0, "reward": 2.0960278511047363, "reward_std": 0.348201185464859, "rewards/accuracy_reward_step": 0.79296875, "rewards/final_brier_reward_step": 0.8007601499557495, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8333513140678406, "step": 157 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.2829687500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2829687500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 772.57421875, "completions/mean_terminated_length": 784.8373413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 467.0, "epoch": 0.16853333333333334, "grad_norm": 0.011688906699419022, "learning_rate": 1.1666666666666668e-06, "loss": -0.0188, "num_tokens": 44970617.0, "reward": 1.9274451732635498, "reward_std": 0.3046892285346985, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7088847160339355, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7665203809738159, "step": 158 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5172413793103448, "calib/avg_num_step_conf": 6.3203125, "calib/ece": 0.3294921875000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.001034482758620947, "calib/mean_conf": 0.9896484375000001, "calib/mu_c": 0.99, "calib/mu_w": 0.988965517241379, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3294921875000001, "calib/std_conf": 0.0035730979287718623, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1249.0, "completions/max_terminated_length": 1249.0, "completions/mean_length": 777.06640625, "completions/mean_terminated_length": 789.4008178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 516.0, "epoch": 0.1696, "grad_norm": 0.008924233727157116, "learning_rate": 1.138888888888889e-06, "loss": 0.0063, "num_tokens": 45274330.0, "reward": 1.843568205833435, "reward_std": 0.34368807077407837, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6674585342407227, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7536893486976624, "step": 159 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.496875, "calib/avg_num_step_conf": 6.12109375, "calib/ece": 0.36239215686274506, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0002499999999995284, "calib/mean_conf": 0.9898431372549019, "calib/mu_c": 0.9897500000000001, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.36239215686274506, "calib/std_conf": 0.002499980776550469, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 804.47265625, "completions/mean_terminated_length": 817.2421264648438, "completions/min_length": 0.0, "completions/min_terminated_length": 514.0, "epoch": 0.17066666666666666, "grad_norm": 0.01021601166576147, "learning_rate": 1.111111111111111e-06, "loss": -0.0102, "num_tokens": 45585115.0, "reward": 1.7612160444259644, "reward_std": 0.4553215503692627, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6283385753631592, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6899632215499878, "step": 160 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5087719298245614, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.21250000000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000701754385964759, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9892982456140352, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21250000000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1316.0, "completions/max_terminated_length": 1316.0, "completions/mean_length": 775.4921875, "completions/mean_terminated_length": 787.8016357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 421.0, "epoch": 0.17173333333333332, "grad_norm": 0.009294044226408005, "learning_rate": 1.0833333333333335e-06, "loss": -0.0073, "num_tokens": 45887561.0, "reward": 2.063585042953491, "reward_std": 0.2662416994571686, "rewards/accuracy_reward_step": 0.77734375, "rewards/final_brier_reward_step": 0.781999945640564, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8082783818244934, "step": 161 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5089285714285714, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.20859375000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0007142857142855563, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9892857142857144, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20859375000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1628.0, "completions/max_terminated_length": 1628.0, "completions/mean_length": 763.7265625, "completions/mean_terminated_length": 775.8492431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 362.0, "epoch": 0.1728, "grad_norm": 0.009800123982131481, "learning_rate": 1.0555555555555557e-06, "loss": -0.0024, "num_tokens": 46187219.0, "reward": 2.075169801712036, "reward_std": 0.22966566681861877, "rewards/accuracy_reward_step": 0.78125, "rewards/final_brier_reward_step": 0.7858281135559082, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.8273511528968811, "step": 162 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4971590909090909, "calib/avg_num_step_conf": 6.11328125, "calib/ece": 0.3024609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -5.681818181801912e-05, "calib/mean_conf": 0.9899609375, "calib/mu_c": 0.989943181818182, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3024609375, "calib/std_conf": 0.0006237781024480987, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1786.0, "completions/max_terminated_length": 1786.0, "completions/mean_length": 793.3515625, "completions/mean_terminated_length": 805.9445190429688, "completions/min_length": 0.0, "completions/min_terminated_length": 461.0, "epoch": 0.17386666666666667, "grad_norm": 0.0091823386028409, "learning_rate": 1.0277777777777777e-06, "loss": -0.0163, "num_tokens": 46495149.0, "reward": 1.889510989189148, "reward_std": 0.3669717311859131, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6935710310935974, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7472852468490601, "step": 163 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5061728395061729, "calib/avg_num_step_conf": 5.8515625, "calib/ece": 0.30625000000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004938271604938427, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.989506172839506, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30625000000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1638.0, "completions/max_terminated_length": 1638.0, "completions/mean_length": 848.86328125, "completions/mean_terminated_length": 862.3373413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 520.0, "epoch": 0.17493333333333333, "grad_norm": 0.009390867315232754, "learning_rate": 1.0000000000000002e-06, "loss": 0.008, "num_tokens": 46818594.0, "reward": 1.882812261581421, "reward_std": 0.35324978828430176, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6901249885559082, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7395614981651306, "step": 164 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.0859375, "calib/ece": 0.4392187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4392187500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1340.0, "completions/max_terminated_length": 1340.0, "completions/mean_length": 797.6953125, "completions/mean_terminated_length": 810.357177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 464.0, "epoch": 0.176, "grad_norm": 0.011323746293783188, "learning_rate": 9.722222222222224e-07, "loss": -0.0105, "num_tokens": 47128380.0, "reward": 1.6184314489364624, "reward_std": 0.4666936695575714, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.559587836265564, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.6172629594802856, "step": 165 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5136986301369864, "calib/avg_num_step_conf": 6.14453125, "calib/ece": 0.27484375000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0010958904109586998, "calib/mean_conf": 0.9896875, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9889041095890411, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.27484375000000005, "calib/std_conf": 0.0035216961467452053, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1693.0, "completions/max_terminated_length": 1693.0, "completions/mean_length": 785.94921875, "completions/mean_terminated_length": 798.4246215820312, "completions/min_length": 0.0, "completions/min_terminated_length": 457.0, "epoch": 0.17706666666666668, "grad_norm": 0.008984563872218132, "learning_rate": 9.444444444444445e-07, "loss": 0.0073, "num_tokens": 47435767.0, "reward": 1.9408130645751953, "reward_std": 0.24893325567245483, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7166886329650879, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7809381484985352, "step": 166 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.078125, "calib/ece": 0.2478125000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9900000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2478125000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 840.9609375, "completions/mean_terminated_length": 854.3095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 518.0, "epoch": 0.17813333333333334, "grad_norm": 0.007702964823693037, "learning_rate": 9.166666666666666e-07, "loss": -0.019, "num_tokens": 47756661.0, "reward": 1.9908963441848755, "reward_std": 0.36734330654144287, "rewards/accuracy_reward_step": 0.7421875, "rewards/final_brier_reward_step": 0.7432601451873779, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7828250527381897, "step": 167 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.9140625, "calib/ece": 0.22529411764705887, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22529411764705887, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1476.0, "completions/max_terminated_length": 1476.0, "completions/mean_length": 840.86328125, "completions/mean_terminated_length": 854.2103881835938, "completions/min_length": 0.0, "completions/min_terminated_length": 431.0, "epoch": 0.1792, "grad_norm": 0.00855457317084074, "learning_rate": 8.88888888888889e-07, "loss": -0.0072, "num_tokens": 48076594.0, "reward": 2.0264196395874023, "reward_std": 0.34020769596099854, "rewards/accuracy_reward_step": 0.76171875, "rewards/final_brier_reward_step": 0.76240074634552, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7885903120040894, "step": 168 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5028901734104047, "calib/avg_num_step_conf": 6.03515625, "calib/ece": 0.3142578085937502, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 5.779768786129402e-05, "calib/mean_conf": 0.9900390585937502, "calib/mu_c": 0.9900577976878612, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3142578085937502, "calib/std_conf": 0.0006237157246378521, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1404.0, "completions/max_terminated_length": 1404.0, "completions/mean_length": 821.57421875, "completions/mean_terminated_length": 834.6151123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 433.0, "epoch": 0.18026666666666666, "grad_norm": 0.011762641370296478, "learning_rate": 8.611111111111112e-07, "loss": 0.0026, "num_tokens": 48391101.0, "reward": 1.8578014373779297, "reward_std": 0.32131901383399963, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6741988658905029, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7335692644119263, "step": 169 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.08203125, "calib/ece": 0.2595312500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2595312500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 816.5703125, "completions/mean_terminated_length": 829.5317993164062, "completions/min_length": 0.0, "completions/min_terminated_length": 483.0, "epoch": 0.18133333333333335, "grad_norm": 0.009184346534311771, "learning_rate": 8.333333333333333e-07, "loss": -0.006, "num_tokens": 48704295.0, "reward": 1.9696009159088135, "reward_std": 0.23746982216835022, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7318534851074219, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7715502977371216, "step": 170 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.19921875, "calib/ece": 0.3806250000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3806250000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1453.0, "completions/max_terminated_length": 1453.0, "completions/mean_length": 794.14453125, "completions/mean_terminated_length": 806.7500610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 467.0, "epoch": 0.1824, "grad_norm": 0.010356857441365719, "learning_rate": 8.055555555555557e-07, "loss": 0.0062, "num_tokens": 49014492.0, "reward": 1.7332839965820312, "reward_std": 0.2521524429321289, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6170874834060669, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.6597986221313477, "step": 171 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.2890625, "calib/ece": 0.17503937007874015, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.440892098500626e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.17503937007874015, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1651.0, "completions/max_terminated_length": 1651.0, "completions/mean_length": 791.50390625, "completions/mean_terminated_length": 807.2709350585938, "completions/min_length": 0.0, "completions/min_terminated_length": 466.0, "epoch": 0.18346666666666667, "grad_norm": 0.008940366096794605, "learning_rate": 7.777777777777779e-07, "loss": 0.0022, "num_tokens": 49320469.0, "reward": 2.124072551727295, "reward_std": 0.31485238671302795, "rewards/accuracy_reward_step": 0.80859375, "rewards/final_brier_reward_step": 0.8121663928031921, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.8481862545013428, "step": 172 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.2605882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2605882352941177, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2366.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 822.77734375, "completions/mean_terminated_length": 832.5336303710938, "completions/min_length": 0.0, "completions/min_terminated_length": 391.0, "epoch": 0.18453333333333333, "grad_norm": 0.010574176907539368, "learning_rate": 7.5e-07, "loss": 0.0065, "num_tokens": 49634260.0, "reward": 1.9617128372192383, "reward_std": 0.2606860399246216, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.7318534851074219, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7634353041648865, "step": 173 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.9375, "calib/ece": 0.34686274509803916, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9899999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.34686274509803916, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2392.0, "completions/max_terminated_length": 2392.0, "completions/mean_length": 833.1171875, "completions/mean_terminated_length": 846.34130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 451.0, "epoch": 0.1856, "grad_norm": 0.014669586904346943, "learning_rate": 7.222222222222222e-07, "loss": -0.0094, "num_tokens": 49951770.0, "reward": 1.7839492559432983, "reward_std": 0.47378671169281006, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.63974529504776, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.6835517287254333, "step": 174 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5004042288557213, "calib/avg_num_step_conf": 6.01171875, "calib/ece": 0.4621653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -4.8507462686919744e-05, "calib/mean_conf": 0.9897244094488189, "calib/mu_c": 0.9897014925373132, "calib/mu_w": 0.9897500000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.4621653543307087, "calib/std_conf": 0.003125151121648213, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2531.0, "completions/max_terminated_length": 2531.0, "completions/mean_length": 835.82421875, "completions/mean_terminated_length": 852.47412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 431.0, "epoch": 0.18666666666666668, "grad_norm": 0.009562082588672638, "learning_rate": 6.944444444444446e-07, "loss": -0.0133, "num_tokens": 50271565.0, "reward": 1.5616577863693237, "reward_std": 0.43291059136390686, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5285648107528687, "rewards/format_reward_step": 0.9765625, "rewards/stepwise_brier_reward": 0.6243162155151367, "step": 175 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5057471264367817, "calib/avg_num_step_conf": 6.26171875, "calib/ece": 0.33236220472440947, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.00045977011494291453, "calib/mean_conf": 0.9898425196850394, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.989540229885057, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33236220472440947, "calib/std_conf": 0.0025048777512735247, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1715.0, "completions/max_terminated_length": 1715.0, "completions/mean_length": 801.9765625, "completions/mean_terminated_length": 817.9522094726562, "completions/min_length": 0.0, "completions/min_terminated_length": 448.0, "epoch": 0.18773333333333334, "grad_norm": 0.008803504519164562, "learning_rate": 6.666666666666667e-07, "loss": -0.0149, "num_tokens": 50580935.0, "reward": 1.8366894721984863, "reward_std": 0.27378901839256287, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.659344494342804, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7889761924743652, "step": 176 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.3220312500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3220312500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1522.0, "completions/max_terminated_length": 1522.0, "completions/mean_length": 825.61328125, "completions/mean_terminated_length": 838.7183227539062, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.1888, "grad_norm": 0.009638850577175617, "learning_rate": 6.388888888888889e-07, "loss": -0.0124, "num_tokens": 50896124.0, "reward": 1.841766595840454, "reward_std": 0.21446684002876282, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.6704480648040771, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7122435569763184, "step": 177 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5068493150684932, "calib/avg_num_step_conf": 6.26953125, "calib/ece": 0.2750390625000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0004109589041094708, "calib/mean_conf": 0.9898828125000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.9895890410958903, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2750390625000001, "calib/std_conf": 0.001871334307344296, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 797.8359375, "completions/mean_terminated_length": 810.5000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 513.0, "epoch": 0.18986666666666666, "grad_norm": 0.010374956764280796, "learning_rate": 6.111111111111112e-07, "loss": -0.017, "num_tokens": 51206442.0, "reward": 1.940459132194519, "reward_std": 0.40489891171455383, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7206753492355347, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7520986795425415, "step": 178 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4972067039106145, "calib/avg_num_step_conf": 6.171875, "calib/ece": 0.29062500000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.00022346368715042697, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.9897765363128492, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.29062500000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2087.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 836.5, "completions/mean_terminated_length": 849.77783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 502.0, "epoch": 0.19093333333333334, "grad_norm": 0.01011861115694046, "learning_rate": 5.833333333333334e-07, "loss": 0.0039, "num_tokens": 51526850.0, "reward": 1.904581069946289, "reward_std": 0.34588849544525146, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7012190818786621, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7296052575111389, "step": 179 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 5.9765625, "calib/ece": 0.2517187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2517187500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1729.0, "completions/max_terminated_length": 1729.0, "completions/mean_length": 885.42578125, "completions/mean_terminated_length": 899.480224609375, "completions/min_length": 0.0, "completions/min_terminated_length": 556.0, "epoch": 0.192, "grad_norm": 0.009084006771445274, "learning_rate": 5.555555555555555e-07, "loss": 0.0067, "num_tokens": 51857375.0, "reward": 1.994532823562622, "reward_std": 0.30369269847869873, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7395097613334656, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.816746711730957, "step": 180 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.28125, "calib/ece": 0.3064062500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3064062500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1398.0, "completions/max_terminated_length": 1398.0, "completions/mean_length": 791.02734375, "completions/mean_terminated_length": 803.5833740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 477.0, "epoch": 0.19306666666666666, "grad_norm": 0.009865384548902512, "learning_rate": 5.277777777777779e-07, "loss": -0.0028, "num_tokens": 52166142.0, "reward": 1.8717740774154663, "reward_std": 0.3827122449874878, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.6897441148757935, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7036018967628479, "step": 181 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5045725800201503, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.25921875000000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.000365806401612212, "calib/mean_conf": 0.9896875, "calib/mu_c": 0.9897860962566845, "calib/mu_w": 0.9894202898550722, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25921875000000005, "calib/std_conf": 0.003521696146745205, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 813.54296875, "completions/mean_terminated_length": 826.4564208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 544.0, "epoch": 0.19413333333333332, "grad_norm": 0.009119072929024696, "learning_rate": 5.000000000000001e-07, "loss": 0.0038, "num_tokens": 52480569.0, "reward": 1.9650317430496216, "reward_std": 0.25256583094596863, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.732147216796875, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7529795169830322, "step": 182 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.2751562500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999998, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2751562500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1558.0, "completions/max_terminated_length": 1558.0, "completions/mean_length": 856.921875, "completions/mean_terminated_length": 870.5238647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 512.0, "epoch": 0.1952, "grad_norm": 0.00926928035914898, "learning_rate": 4.7222222222222226e-07, "loss": -0.0148, "num_tokens": 52806621.0, "reward": 1.9432084560394287, "reward_std": 0.4737989902496338, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7202913761138916, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7791052460670471, "step": 183 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.26450980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26450980392156864, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1405.0, "completions/max_terminated_length": 1405.0, "completions/mean_length": 836.9921875, "completions/mean_terminated_length": 850.27783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 466.0, "epoch": 0.19626666666666667, "grad_norm": 0.00922873243689537, "learning_rate": 4.444444444444445e-07, "loss": -0.0194, "num_tokens": 53126171.0, "reward": 1.9618688821792603, "reward_std": 0.32835304737091064, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7280253767967224, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7913253307342529, "step": 184 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.25587301587301603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.25587301587301603, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2511.0, "completions/max_terminated_length": 2511.0, "completions/mean_length": 845.546875, "completions/mean_terminated_length": 865.8400268554688, "completions/min_length": 0.0, "completions/min_terminated_length": 516.0, "epoch": 0.19733333333333333, "grad_norm": 0.008712178096175194, "learning_rate": 4.1666666666666667e-07, "loss": -0.0022, "num_tokens": 53449551.0, "reward": 1.9483182430267334, "reward_std": 0.34782806038856506, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7277921438217163, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7607932090759277, "step": 185 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.05078125, "calib/ece": 0.30889763779527557, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.30889763779527557, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2501.0, "completions/max_terminated_length": 2501.0, "completions/mean_length": 864.60546875, "completions/mean_terminated_length": 874.8577270507812, "completions/min_length": 0.0, "completions/min_terminated_length": 462.0, "epoch": 0.1984, "grad_norm": 0.01143097784370184, "learning_rate": 3.8888888888888895e-07, "loss": -0.0059, "num_tokens": 53775930.0, "reward": 1.8627067804336548, "reward_std": 0.23205628991127014, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.6818546652793884, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7455346584320068, "step": 186 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.29952380952380964, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.29952380952380964, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2036.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 849.0625, "completions/mean_terminated_length": 869.4400634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 479.0, "epoch": 0.19946666666666665, "grad_norm": 0.011029952205717564, "learning_rate": 3.611111111111111e-07, "loss": -0.0206, "num_tokens": 54094834.0, "reward": 1.8708256483078003, "reward_std": 0.4722091555595398, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.6856827735900879, "rewards/format_reward_step": 0.984375, "rewards/stepwise_brier_reward": 0.7507448792457581, "step": 187 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.078125, "calib/ece": 0.2331372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.3306690738754696e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2331372549019608, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1574.0, "completions/max_terminated_length": 1574.0, "completions/mean_length": 858.39453125, "completions/mean_terminated_length": 872.0198974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 431.0, "epoch": 0.20053333333333334, "grad_norm": 0.00768205476924777, "learning_rate": 3.3333333333333335e-07, "loss": -0.0058, "num_tokens": 54418655.0, "reward": 2.016286849975586, "reward_std": 0.32633286714553833, "rewards/accuracy_reward_step": 0.75390625, "rewards/final_brier_reward_step": 0.7585726380348206, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7987623810768127, "step": 188 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.26450980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26450980392156864, "calib/std_conf": 0.0, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1578.0, "completions/max_terminated_length": 1578.0, "completions/mean_length": 837.58203125, "completions/mean_terminated_length": 850.8770141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 521.0, "epoch": 0.2016, "grad_norm": 0.009047552943229675, "learning_rate": 3.055555555555556e-07, "loss": -0.0177, "num_tokens": 54740844.0, "reward": 1.9537990093231201, "reward_std": 0.3179192543029785, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7280253767967224, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.7590456008911133, "step": 189 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.506578947368421, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.28671875000000013, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005263157894738191, "calib/mean_conf": 0.9898437500000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9894736842105262, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28671875000000013, "calib/std_conf": 0.0024951124097923947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1488.0, "completions/max_terminated_length": 1488.0, "completions/mean_length": 852.01171875, "completions/mean_terminated_length": 865.5357666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 484.0, "epoch": 0.20266666666666666, "grad_norm": 0.010138653218746185, "learning_rate": 2.7777777777777776e-07, "loss": 0.001, "num_tokens": 55064567.0, "reward": 1.9208712577819824, "reward_std": 0.322945773601532, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.7092655897140503, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.755469560623169, "step": 190 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.23828125, "calib/ece": 0.3415625000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3415625000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 828.1328125, "completions/mean_terminated_length": 841.27783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 480.0, "epoch": 0.20373333333333332, "grad_norm": 0.009094194509088993, "learning_rate": 2.5000000000000004e-07, "loss": -0.0019, "num_tokens": 55380737.0, "reward": 1.8121626377105713, "reward_std": 0.21085627377033234, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6553687453269958, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.702657163143158, "step": 191 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.20703125, "calib/ece": 0.1892187500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -2.220446049250313e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000002, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1892187500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1385.0, "completions/max_terminated_length": 1385.0, "completions/mean_length": 800.58984375, "completions/mean_terminated_length": 813.2976684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 433.0, "epoch": 0.2048, "grad_norm": 0.009250716306269169, "learning_rate": 2.2222222222222224e-07, "loss": 0.0208, "num_tokens": 55690664.0, "reward": 2.106654405593872, "reward_std": 0.2364106923341751, "rewards/accuracy_reward_step": 0.80078125, "rewards/final_brier_reward_step": 0.8045878410339355, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8251546621322632, "step": 192 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5073529411764706, "calib/avg_num_step_conf": 6.16796875, "calib/ece": 0.25650980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0005882352941177782, "calib/mean_conf": 0.9898431372549019, "calib/mu_c": 0.99, "calib/mu_w": 0.9894117647058822, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.25650980392156864, "calib/std_conf": 0.0024999807765504704, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2518.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 855.9375, "completions/mean_terminated_length": 866.0869750976562, "completions/min_length": 0.0, "completions/min_terminated_length": 500.0, "epoch": 0.20586666666666667, "grad_norm": 0.01025775820016861, "learning_rate": 1.9444444444444447e-07, "loss": 0.0057, "num_tokens": 56015496.0, "reward": 1.96535325050354, "reward_std": 0.19145634770393372, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.7359069585800171, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7583186626434326, "step": 193 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.26953125, "calib/ece": 0.2673437500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 3.3306690738754696e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2673437500000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1313.0, "completions/max_terminated_length": 1313.0, "completions/mean_length": 824.7734375, "completions/mean_terminated_length": 837.8651123046875, "completions/min_length": 0.0, "completions/min_terminated_length": 500.0, "epoch": 0.20693333333333333, "grad_norm": 0.00920251477509737, "learning_rate": 1.6666666666666668e-07, "loss": -0.0116, "num_tokens": 56332582.0, "reward": 1.9541829824447632, "reward_std": 0.23327034711837769, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7279476523399353, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7684719562530518, "step": 194 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5168539325842696, "calib/avg_num_step_conf": 6.0703125, "calib/ece": 0.3349739583333333, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.007715355805243296, "calib/mean_conf": 0.9873177083333334, "calib/mu_c": 0.9899999999999999, "calib/mu_w": 0.9822846441947566, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3349739583333333, "calib/std_conf": 0.028562366562264282, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1432.0, "completions/max_terminated_length": 1432.0, "completions/mean_length": 839.01171875, "completions/mean_terminated_length": 852.3294067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 477.0, "epoch": 0.208, "grad_norm": 0.009040816687047482, "learning_rate": 1.3888888888888888e-07, "loss": -0.0006, "num_tokens": 56653353.0, "reward": 1.8100706338882446, "reward_std": 0.21309393644332886, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6631484627723694, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.6865090727806091, "step": 195 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5130681818181818, "calib/avg_num_step_conf": 6.55078125, "calib/ece": 0.30171875000000004, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.0010454545454545716, "calib/mean_conf": 0.98921875, "calib/mu_c": 0.9895454545454547, "calib/mu_w": 0.9885000000000002, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30171875000000004, "calib/std_conf": 0.005535309244974489, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 780.6796875, "completions/mean_terminated_length": 793.0714721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 550.0, "epoch": 0.20906666666666668, "grad_norm": 0.011479136534035206, "learning_rate": 1.1111111111111112e-07, "loss": -0.0029, "num_tokens": 56955751.0, "reward": 1.884519100189209, "reward_std": 0.19984768331050873, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6945406198501587, "rewards/format_reward_step": 1.0, "rewards/stepwise_brier_reward": 0.7185356616973877, "step": 196 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.39, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 2.220446049250313e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9899999999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39, "calib/std_conf": 0.0, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1401.0, "completions/max_terminated_length": 1401.0, "completions/mean_length": 834.015625, "completions/mean_terminated_length": 847.2540283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 522.0, "epoch": 0.21013333333333334, "grad_norm": 0.008359176106750965, "learning_rate": 8.333333333333334e-08, "loss": 0.0076, "num_tokens": 57274315.0, "reward": 1.7043508291244507, "reward_std": 0.3267871141433716, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6054476499557495, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.6416432857513428, "step": 197 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5066666666666666, "calib/avg_num_step_conf": 6.3125, "calib/ece": 0.28257378472187494, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.99609375, "calib/gap": 0.0013481481493334835, "calib/mean_conf": 0.989605034721875, "calib/mu_c": 0.99, "calib/mu_w": 0.9886518518506665, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.28257378472187494, "calib/std_conf": 0.006307089708075461, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1326.0, "completions/max_terminated_length": 1326.0, "completions/mean_length": 809.23828125, "completions/mean_terminated_length": 822.0833740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 501.0, "epoch": 0.2112, "grad_norm": 0.00892886146903038, "learning_rate": 5.555555555555556e-08, "loss": 0.0056, "num_tokens": 57586864.0, "reward": 1.9245802164077759, "reward_std": 0.28633540868759155, "rewards/accuracy_reward_step": 0.70703125, "rewards/final_brier_reward_step": 0.7056432962417603, "rewards/format_reward_step": 0.98828125, "rewards/stepwise_brier_reward": 0.7739272117614746, "step": 198 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.2331372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -3.3306690738754696e-16, "calib/mean_conf": 0.99, "calib/mu_c": 0.99, "calib/mu_w": 0.9900000000000003, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2331372549019608, "calib/std_conf": 0.0, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 849.35546875, "completions/mean_terminated_length": 862.8373413085938, "completions/min_length": 0.0, "completions/min_terminated_length": 524.0, "epoch": 0.21226666666666666, "grad_norm": 0.00927034579217434, "learning_rate": 2.777777777777778e-08, "loss": 0.0016, "num_tokens": 57908499.0, "reward": 2.0206198692321777, "reward_std": 0.184719055891037, "rewards/accuracy_reward_step": 0.75390625, "rewards/final_brier_reward_step": 0.7586503624916077, "rewards/format_reward_step": 0.99609375, "rewards/stepwise_brier_reward": 0.8082042336463928, "step": 199 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5, "calib/avg_num_step_conf": 6.1328125, "calib/ece": 0.3025000000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 1.1102230246251565e-16, "calib/mean_conf": 0.9900000000000001, "calib/mu_c": 0.9900000000000001, "calib/mu_w": 0.99, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3025000000000001, "calib/std_conf": 1.1102230246251565e-16, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1509.0, "completions/max_terminated_length": 1509.0, "completions/mean_length": 849.71875, "completions/mean_terminated_length": 863.2064208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 507.0, "epoch": 0.21333333333333335, "grad_norm": 0.008647196926176548, "learning_rate": 0.0, "loss": -0.0046, "num_tokens": 58234075.0, "reward": 1.8795125484466553, "reward_std": 0.23101741075515747, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.6934944987297058, "rewards/format_reward_step": 0.9921875, "rewards/stepwise_brier_reward": 0.7151811122894287, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": -0.012948631015606225, "train_runtime": 11330.6406, "train_samples_per_second": 4.519, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 58234075, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }