{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.004053546581417322, "learning_rate": 2.5000000000000004e-07, "loss": 0.0319, "num_tokens": 264685.0, "reward": 0.045387499034404755, "reward_std": 0.09252828359603882, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006135514006018639, "learning_rate": 5.000000000000001e-07, "loss": 0.0653, "num_tokens": 533467.0, "reward": 0.09099707007408142, "reward_std": 0.17441941797733307, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.8676470588235294, "calib/avg_num_step_conf": 0.296875, "calib/ece": 0.8660526315789473, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.9473684210526315, "calib/gap": 0.026470588235294135, "calib/mean_conf": 0.9713157894736841, "calib/mu_c": 0.995, "calib/mu_w": 0.9685294117647059, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.8660526315789473, "calib/std_conf": 0.024270520631934384, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 690.5546875, "completions/mean_terminated_length": 752.2637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.0056653269566595554, "learning_rate": 7.5e-07, "loss": 0.0359, "num_tokens": 815505.0, "reward": 0.042592138051986694, "reward_std": 0.09321815520524979, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010965527966618538, "rewards/format_reward_step": 0.05859375, "step": 3 }, { "calib/answer_extract_rate": 0.078125, "calib/avg_num_step_conf": 0.140625, "calib/ece": 0.8799999999999999, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.015625, "calib/frac_conf_gt_0.9": 0.75, "calib/mean_conf": 0.88, "calib/mu_c": NaN, "calib/mu_w": 0.88, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.8799999999999999, "calib/std_conf": 0.20266968199511246, "calib/step_conf_rate": 0.03515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 714.84375, "completions/mean_terminated_length": 778.723388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0025851440150290728, "learning_rate": 1.0000000000000002e-06, "loss": 0.0194, "num_tokens": 1104673.0, "reward": 0.009871484711766243, "reward_std": 0.02792077511548996, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.004117968957871199, "rewards/format_reward_step": 0.015625, "step": 4 }, { "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.2222222222222222, "calib/avg_num_step_conf": 0.234375, "calib/ece": 0.7763636363636364, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9090909090909091, "calib/gap": -0.0344444444444445, "calib/mean_conf": 0.9581818181818181, "calib/mu_c": 0.9299999999999999, "calib/mu_w": 0.9644444444444444, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7763636363636364, "calib/std_conf": 0.026906633794452246, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 659.07421875, "completions/mean_terminated_length": 714.927978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.005333333333333333, "grad_norm": 0.004935156553983688, "learning_rate": 1.25e-06, "loss": 0.0127, "num_tokens": 1380084.0, "reward": 0.026415038853883743, "reward_std": 0.07068032026290894, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.005955078173428774, "rewards/format_reward_step": 0.03125, "step": 5 }, { "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.5343137254901962, "calib/avg_num_step_conf": 0.6796875, "calib/ece": 0.7021739130434783, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.9565217391304348, "calib/gap": 0.002647058823529669, "calib/mean_conf": 0.9630434782608696, "calib/mu_c": 0.9650000000000002, "calib/mu_w": 0.9623529411764705, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.7021739130434783, "calib/std_conf": 0.03469554320566958, "calib/step_conf_rate": 0.11328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 605.66015625, "completions/mean_terminated_length": 665.4463500976562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.009108630008995533, "learning_rate": 1.5e-06, "loss": 0.0105, "num_tokens": 1641085.0, "reward": 0.07385234534740448, "reward_std": 0.15033572912216187, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.02270468696951866, "rewards/format_reward_step": 0.078125, "step": 6 }, { "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5735294117647058, "calib/avg_num_step_conf": 0.32421875, "calib/ece": 0.7004761904761903, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.03897058823529409, "calib/mean_conf": 0.8909523809523809, "calib/mu_c": 0.9225, "calib/mu_w": 0.8835294117647059, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.7004761904761903, "calib/std_conf": 0.1668733412446116, "calib/step_conf_rate": 0.078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2924.0, "completions/max_terminated_length": 2924.0, "completions/mean_length": 691.96484375, "completions/mean_terminated_length": 753.7999877929688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.005322422366589308, "learning_rate": 1.75e-06, "loss": 0.0253, "num_tokens": 1925652.0, "reward": 0.04469941556453705, "reward_std": 0.10981818288564682, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.015180077403783798, "rewards/format_reward_step": 0.04296875, "step": 7 }, { "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.5185185185185185, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.36933333333333324, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": -0.017777777777777892, "calib/mean_conf": 0.9293333333333335, "calib/mu_c": 0.922222222222222, "calib/mu_w": 0.94, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.3493333333333333, "calib/std_conf": 0.07361763073853679, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 687.71484375, "completions/mean_terminated_length": 736.6317749023438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.004791423212736845, "learning_rate": 2.0000000000000003e-06, "loss": 0.0516, "num_tokens": 2208219.0, "reward": 0.08494491875171661, "reward_std": 0.19156719744205475, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.033171091228723526, "rewards/format_reward_step": 0.05078125, "step": 8 }, { "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.07692307692307693, "calib/avg_num_step_conf": 0.40625, "calib/ece": 0.9007142857142856, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.02384615384615374, "calib/mean_conf": 0.972142857142857, "calib/mu_c": 0.95, "calib/mu_w": 0.9738461538461537, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.9007142857142856, "calib/std_conf": 0.01739399380269419, "calib/step_conf_rate": 0.07421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 563.11328125, "completions/mean_terminated_length": 629.5065307617188, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0096, "grad_norm": 0.004804661963135004, "learning_rate": 2.25e-06, "loss": 0.0143, "num_tokens": 2459912.0, "reward": 0.03250976651906967, "reward_std": 0.07374005764722824, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006425781175494194, "rewards/format_reward_step": 0.05078125, "step": 9 }, { "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.3999999999999999, "calib/avg_num_step_conf": 0.56640625, "calib/ece": 0.732813043478261, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.6086956521739131, "calib/gap": -0.15443166666666674, "calib/mean_conf": 0.777621739130435, "calib/mu_c": 0.6433333333333333, "calib/mu_w": 0.7977650000000001, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6900000000000002, "calib/std_conf": 0.32825864663802473, "calib/step_conf_rate": 0.1171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2877.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 586.82421875, "completions/mean_terminated_length": 656.0131225585938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0062477379105985165, "learning_rate": 2.5e-06, "loss": 0.0259, "num_tokens": 2716939.0, "reward": 0.062019336968660355, "reward_std": 0.13213272392749786, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.02247617021203041, "rewards/format_reward_step": 0.078125, "step": 10 }, { "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.35625, "calib/avg_num_step_conf": 0.79296875, "calib/ece": 0.6326470588235293, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.8529411764705882, "calib/gap": 0.034083333333333354, "calib/mean_conf": 0.9179411764705881, "calib/mu_c": 0.942, "calib/mu_w": 0.9079166666666666, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.1953125, "calib/nonempty_step_conf_rate": 0.15625, "calib/pce": 0.628235294117647, "calib/std_conf": 0.1566896410060854, "calib/step_conf_rate": 0.15625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2780.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 656.1328125, "completions/mean_terminated_length": 708.734130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.005708031821995974, "learning_rate": 2.7500000000000004e-06, "loss": 0.0538, "num_tokens": 2989389.0, "reward": 0.12179140746593475, "reward_std": 0.22675037384033203, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.04436406493186951, "rewards/format_reward_step": 0.12109375, "step": 11 }, { "calib/answer_extract_rate": 0.203125, "calib/auroc": 0.6475095785440612, "calib/avg_num_step_conf": 0.85546875, "calib/ece": 0.6549999999999999, "calib/final_conf_rate": 0.1484375, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.7631578947368421, "calib/gap": 0.09367816091954018, "calib/mean_conf": 0.8918421052631578, "calib/mu_c": 0.9633333333333334, "calib/mu_w": 0.8696551724137932, "calib/nonempty_final_conf_rate": 0.1484375, "calib/nonempty_reasoning_rate": 0.2421875, "calib/nonempty_step_conf_rate": 0.19140625, "calib/pce": 0.6549999999999999, "calib/std_conf": 0.18988715850206647, "calib/step_conf_rate": 0.19140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 600.0234375, "completions/mean_terminated_length": 656.4359130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.009454714134335518, "learning_rate": 3e-06, "loss": 0.0693, "num_tokens": 3247171.0, "reward": 0.13060665130615234, "reward_std": 0.2470502257347107, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.0541820302605629, "rewards/format_reward_step": 0.12890625, "step": 12 }, { "calib/answer_extract_rate": 0.2578125, "calib/auroc": 0.5850340136054422, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.7215344827586208, "calib/final_conf_rate": 0.2265625, "calib/format_rate": 0.1796875, "calib/frac_conf_gt_0.9": 0.6379310344827587, "calib/gap": 0.07754875283446738, "calib/mean_conf": 0.8767068965517241, "calib/mu_c": 0.9422222222222223, "calib/mu_w": 0.8646734693877549, "calib/nonempty_final_conf_rate": 0.2265625, "calib/nonempty_reasoning_rate": 0.30859375, "calib/nonempty_step_conf_rate": 0.2421875, "calib/pce": 0.7215344827586208, "calib/std_conf": 0.17499911414757954, "calib/step_conf_rate": 0.2421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 648.640625, "completions/mean_terminated_length": 675.0081176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.01001790538430214, "learning_rate": 3.2500000000000002e-06, "loss": 0.1389, "num_tokens": 3517815.0, "reward": 0.16192597150802612, "reward_std": 0.31205716729164124, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.06603945791721344, "rewards/format_reward_step": 0.1796875, "step": 13 }, { "calib/answer_extract_rate": 0.32421875, "calib/auroc": 0.5225490196078432, "calib/avg_num_step_conf": 1.453125, "calib/ece": 0.5990408450704225, "calib/final_conf_rate": 0.27734375, "calib/format_rate": 0.24609375, "calib/frac_conf_gt_0.9": 0.647887323943662, "calib/gap": 0.03117450980392167, "calib/mean_conf": 0.853607042253521, "calib/mu_c": 0.876, "calib/mu_w": 0.8448254901960783, "calib/nonempty_final_conf_rate": 0.27734375, "calib/nonempty_reasoning_rate": 0.3671875, "calib/nonempty_step_conf_rate": 0.3046875, "calib/pce": 0.5854788732394367, "calib/std_conf": 0.23616655755663868, "calib/step_conf_rate": 0.3046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 554.3046875, "completions/mean_terminated_length": 596.2269287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.014933333333333333, "grad_norm": 0.01179414987564087, "learning_rate": 3.5e-06, "loss": 0.0675, "num_tokens": 3765117.0, "reward": 0.2536017894744873, "reward_std": 0.3610790967941284, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.10485979169607162, "rewards/format_reward_step": 0.24609375, "step": 14 }, { "calib/answer_extract_rate": 0.40625, "calib/auroc": 0.47217741935483876, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.7005487804878048, "calib/final_conf_rate": 0.3203125, "calib/format_rate": 0.2890625, "calib/frac_conf_gt_0.9": 0.8292682926829268, "calib/gap": -0.06138709677419352, "calib/mean_conf": 0.9159146341463413, "calib/mu_c": 0.8694999999999998, "calib/mu_w": 0.9308870967741933, "calib/nonempty_final_conf_rate": 0.3203125, "calib/nonempty_reasoning_rate": 0.47265625, "calib/nonempty_step_conf_rate": 0.37109375, "calib/pce": 0.6862804878048778, "calib/std_conf": 0.1515431357104759, "calib/step_conf_rate": 0.37109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 503.78515625, "completions/mean_terminated_length": 517.94775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.011700741946697235, "learning_rate": 3.7500000000000005e-06, "loss": 0.1398, "num_tokens": 4001966.0, "reward": 0.2702401876449585, "reward_std": 0.41204118728637695, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.09516787528991699, "rewards/format_reward_step": 0.2890625, "step": 15 }, { "calib/answer_extract_rate": 0.55078125, "calib/auroc": 0.4697368421052632, "calib/avg_num_step_conf": 2.83203125, "calib/ece": 0.5950789062500003, "calib/final_conf_rate": 0.5, "calib/format_rate": 0.4453125, "calib/frac_conf_gt_0.9": 0.7265625, "calib/gap": 0.03276497076023377, "calib/mean_conf": 0.8911726562500002, "calib/mu_c": 0.9142105263157895, "calib/mu_w": 0.8814455555555557, "calib/nonempty_final_conf_rate": 0.5, "calib/nonempty_reasoning_rate": 0.65234375, "calib/nonempty_step_conf_rate": 0.5625, "calib/pce": 0.5946882812500003, "calib/std_conf": 0.18623632488170597, "calib/step_conf_rate": 0.5625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2944.0, "completions/max_terminated_length": 2944.0, "completions/mean_length": 465.9609375, "completions/mean_terminated_length": 492.9173278808594, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.012210188433527946, "learning_rate": 4.000000000000001e-06, "loss": 0.0795, "num_tokens": 4230100.0, "reward": 0.4675709009170532, "reward_std": 0.5007450580596924, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.19295427203178406, "rewards/format_reward_step": 0.4453125, "step": 16 }, { "calib/answer_extract_rate": 0.59765625, "calib/auroc": 0.5087833441769682, "calib/avg_num_step_conf": 2.65625, "calib/ece": 0.683408888888889, "calib/final_conf_rate": 0.52734375, "calib/format_rate": 0.453125, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.03695647364996746, "calib/mean_conf": 0.8982237037037036, "calib/mu_c": 0.9272413793103447, "calib/mu_w": 0.8902849056603772, "calib/nonempty_final_conf_rate": 0.52734375, "calib/nonempty_reasoning_rate": 0.66796875, "calib/nonempty_step_conf_rate": 0.546875, "calib/pce": 0.683408888888889, "calib/std_conf": 0.17078073627484708, "calib/step_conf_rate": 0.546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 409.4296875, "completions/mean_terminated_length": 415.9285888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.018133333333333335, "grad_norm": 0.012316138483583927, "learning_rate": 4.25e-06, "loss": 0.1109, "num_tokens": 4438442.0, "reward": 0.42024004459381104, "reward_std": 0.49231672286987305, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.16079255938529968, "rewards/format_reward_step": 0.453125, "step": 17 }, { "calib/answer_extract_rate": 0.71484375, "calib/auroc": 0.5049441494231826, "calib/avg_num_step_conf": 3.51953125, "calib/ece": 0.6499999999999999, "calib/final_conf_rate": 0.6640625, "calib/format_rate": 0.59765625, "calib/frac_conf_gt_0.9": 0.7058823529411765, "calib/gap": -0.003611060245376607, "calib/mean_conf": 0.882, "calib/mu_c": 0.8793023255813952, "calib/mu_w": 0.8829133858267718, "calib/nonempty_final_conf_rate": 0.6640625, "calib/nonempty_reasoning_rate": 0.7890625, "calib/nonempty_step_conf_rate": 0.6953125, "calib/pce": 0.6395294117647058, "calib/std_conf": 0.18625283258801414, "calib/step_conf_rate": 0.6953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2075.0, "completions/max_terminated_length": 2075.0, "completions/mean_length": 387.59375, "completions/mean_terminated_length": 395.31475830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.010788912884891033, "learning_rate": 4.5e-06, "loss": 0.091, "num_tokens": 4648386.0, "reward": 0.5889451503753662, "reward_std": 0.5265281796455383, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.23648397624492645, "rewards/format_reward_step": 0.59765625, "step": 18 }, { "calib/answer_extract_rate": 0.890625, "calib/auroc": 0.5793439716312057, "calib/avg_num_step_conf": 4.60546875, "calib/ece": 0.7155063988095238, "calib/final_conf_rate": 0.875, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.65625, "calib/gap": 0.06672523640661943, "calib/mean_conf": 0.8762206845238095, "calib/mu_c": 0.9322222222222223, "calib/mu_w": 0.8654969858156029, "calib/nonempty_final_conf_rate": 0.875, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.7155063988095238, "calib/std_conf": 0.18285521951342068, "calib/step_conf_rate": 0.92578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 322.078125, "completions/mean_terminated_length": 322.078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.020266666666666665, "grad_norm": 0.010422209277749062, "learning_rate": 4.75e-06, "loss": 0.1375, "num_tokens": 4835598.0, "reward": 0.696550190448761, "reward_std": 0.4772275686264038, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.2798190712928772, "rewards/format_reward_step": 0.81640625, "step": 19 }, { "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.4727515243902438, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.6378179824561403, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": -0.024479801829268233, "calib/mean_conf": 0.8956864035087719, "calib/mu_c": 0.878078125, "calib/mu_w": 0.9025579268292683, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.6264013157894737, "calib/std_conf": 0.16848325998176433, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2049.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 279.16015625, "completions/mean_terminated_length": 280.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 27.0, "epoch": 0.021333333333333333, "grad_norm": 0.011439248919487, "learning_rate": 5e-06, "loss": 0.0787, "num_tokens": 5011935.0, "reward": 0.8440353870391846, "reward_std": 0.5284815430641174, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.33260199427604675, "rewards/format_reward_step": 0.84765625, "step": 20 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5728703703703703, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.6571620833333334, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.7583333333333333, "calib/gap": 0.049561666666666615, "calib/mean_conf": 0.9028287500000001, "calib/mu_c": 0.94, "calib/mu_w": 0.8904383333333333, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.6549954166666667, "calib/std_conf": 0.16495550172224477, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2138.0, "completions/max_terminated_length": 2138.0, "completions/mean_length": 292.421875, "completions/mean_terminated_length": 292.421875, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.0224, "grad_norm": 0.009296974167227745, "learning_rate": 4.9722222222222224e-06, "loss": 0.1052, "num_tokens": 5189755.0, "reward": 0.8685685396194458, "reward_std": 0.5453428030014038, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.3465120196342468, "rewards/format_reward_step": 0.9140625, "step": 21 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4936801881246326, "calib/avg_num_step_conf": 4.83984375, "calib/ece": 0.6979629629629629, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.7242798353909465, "calib/gap": 0.008095238095238044, "calib/mean_conf": 0.9201851851851852, "calib/mu_c": 0.9264814814814814, "calib/mu_w": 0.9183862433862433, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6979629629629629, "calib/std_conf": 0.1163349192765126, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 258.171875, "completions/mean_terminated_length": 258.171875, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 0.023466666666666667, "grad_norm": 0.00845903530716896, "learning_rate": 4.944444444444445e-06, "loss": 0.0093, "num_tokens": 5357663.0, "reward": 0.8230749368667603, "reward_std": 0.546536922454834, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.3063061237335205, "rewards/format_reward_step": 0.91015625, "step": 22 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.504986125563649, "calib/avg_num_step_conf": 4.4375, "calib/ece": 0.6572983870967741, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.717741935483871, "calib/gap": 0.02962365591397864, "calib/mean_conf": 0.9047177419354839, "calib/mu_c": 0.9269354838709678, "calib/mu_w": 0.8973118279569892, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6560080645161289, "calib/std_conf": 0.1523363776098416, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 261.12890625, "completions/mean_terminated_length": 261.12890625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.024533333333333334, "grad_norm": 0.009214174933731556, "learning_rate": 4.9166666666666665e-06, "loss": 0.0975, "num_tokens": 5528448.0, "reward": 0.890078067779541, "reward_std": 0.47853392362594604, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.3543750047683716, "rewards/format_reward_step": 0.93359375, "step": 23 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5610976065818997, "calib/avg_num_step_conf": 4.90234375, "calib/ece": 0.6875303643724695, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.7489878542510121, "calib/gap": 0.033760284218399605, "calib/mean_conf": 0.9142510121457489, "calib/mu_c": 0.9403571428571429, "calib/mu_w": 0.9065968586387433, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6875303643724695, "calib/std_conf": 0.13924662007901306, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1363.0, "completions/max_terminated_length": 1363.0, "completions/mean_length": 251.2734375, "completions/mean_terminated_length": 251.2734375, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.0256, "grad_norm": 0.008825111202895641, "learning_rate": 4.888888888888889e-06, "loss": 0.0102, "num_tokens": 5697286.0, "reward": 0.8552491664886475, "reward_std": 0.5274129509925842, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.3276859521865845, "rewards/format_reward_step": 0.9453125, "step": 24 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4502214078485265, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.623585657370518, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.6932270916334662, "calib/gap": -0.024641930065658957, "calib/mean_conf": 0.9000796812749005, "calib/mu_c": 0.8827027027027026, "calib/mu_w": 0.9073446327683615, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6144223107569722, "calib/std_conf": 0.14409213528743076, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 237.2734375, "completions/mean_terminated_length": 238.2039337158203, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.02666666666666667, "grad_norm": 0.007968959398567677, "learning_rate": 4.861111111111111e-06, "loss": -0.0052, "num_tokens": 5861252.0, "reward": 0.9608609080314636, "reward_std": 0.5278224945068359, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.382659375667572, "rewards/format_reward_step": 0.9609375, "step": 25 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.51286333273154, "calib/avg_num_step_conf": 5.19140625, "calib/ece": 0.6699598393574299, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.6746987951807228, "calib/gap": 0.006085936089546884, "calib/mean_conf": 0.8942971887550201, "calib/mu_c": 0.8989655172413794, "calib/mu_w": 0.8928795811518325, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6656626506024097, "calib/std_conf": 0.16410279382109272, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2810.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 263.484375, "completions/mean_terminated_length": 263.484375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.027733333333333332, "grad_norm": 0.00924302265048027, "learning_rate": 4.833333333333333e-06, "loss": 0.034, "num_tokens": 6033944.0, "reward": 0.8833884596824646, "reward_std": 0.4853111505508423, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.3488081991672516, "rewards/format_reward_step": 0.96484375, "step": 26 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5088881706528765, "calib/avg_num_step_conf": 4.64453125, "calib/ece": 0.6110404, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.02143771816418849, "calib/mean_conf": 0.8770404, "calib/mu_c": 0.8926470588235293, "calib/mu_w": 0.8712093406593409, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6080404, "calib/std_conf": 0.15886140125228657, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1076.0, "completions/max_terminated_length": 1076.0, "completions/mean_length": 235.78515625, "completions/mean_terminated_length": 236.70982360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.0288, "grad_norm": 0.008842146955430508, "learning_rate": 4.805555555555556e-06, "loss": 0.0214, "num_tokens": 6199521.0, "reward": 0.9590640068054199, "reward_std": 0.4516690969467163, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.406409353017807, "rewards/format_reward_step": 0.97265625, "step": 27 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4355572998430141, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.6103968253968255, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.5952380952380952, "calib/gap": -0.002461538461538626, "calib/mean_conf": 0.8863492063492064, "calib/mu_c": 0.8845714285714283, "calib/mu_w": 0.887032967032967, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.609484126984127, "calib/std_conf": 0.13989850728524575, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1916.0, "completions/max_terminated_length": 1916.0, "completions/mean_length": 262.296875, "completions/mean_terminated_length": 263.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.029866666666666666, "grad_norm": 0.007787063717842102, "learning_rate": 4.777777777777778e-06, "loss": 0.0222, "num_tokens": 6373613.0, "reward": 0.9608750343322754, "reward_std": 0.49356719851493835, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.3944062292575836, "rewards/format_reward_step": 0.97265625, "step": 28 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4803363628641808, "calib/avg_num_step_conf": 5.140625, "calib/ece": 0.6088582677165355, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.4094488188976378, "calib/gap": 0.012082731674169622, "calib/mean_conf": 0.849015748031496, "calib/mu_c": 0.8581967213114754, "calib/mu_w": 0.8461139896373058, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6088582677165355, "calib/std_conf": 0.14378535425576625, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1972.0, "completions/max_terminated_length": 1972.0, "completions/mean_length": 284.26953125, "completions/mean_terminated_length": 284.26953125, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.030933333333333334, "grad_norm": 0.007615217939019203, "learning_rate": 4.75e-06, "loss": 0.0375, "num_tokens": 6553514.0, "reward": 0.9362058639526367, "reward_std": 0.4864776134490967, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.41928672790527344, "rewards/format_reward_step": 0.9765625, "step": 29 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49706959706959714, "calib/avg_num_step_conf": 4.69140625, "calib/ece": 0.5245275590551182, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2637795275590551, "calib/gap": 0.01864631664631644, "calib/mean_conf": 0.7804330708661418, "calib/mu_c": 0.7943076923076923, "calib/mu_w": 0.7756613756613758, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5245275590551182, "calib/std_conf": 0.18300695269057257, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 689.0, "completions/max_terminated_length": 689.0, "completions/mean_length": 253.421875, "completions/mean_terminated_length": 254.4156951904297, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.032, "grad_norm": 0.009619243443012238, "learning_rate": 4.722222222222222e-06, "loss": -0.0326, "num_tokens": 6725374.0, "reward": 0.9973111152648926, "reward_std": 0.4718785881996155, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.5024347305297852, "rewards/format_reward_step": 0.9765625, "step": 30 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5776629704301075, "calib/avg_num_step_conf": 4.68359375, "calib/ece": 0.4586614173228346, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1377952755905512, "calib/gap": 0.06506048387096774, "calib/mean_conf": 0.702755905511811, "calib/mu_c": 0.7519354838709679, "calib/mu_w": 0.6868750000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4586614173228346, "calib/std_conf": 0.2185931292742577, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 255.47265625, "completions/mean_terminated_length": 256.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.03306666666666667, "grad_norm": 0.008545058779418468, "learning_rate": 4.694444444444445e-06, "loss": 0.0224, "num_tokens": 6896687.0, "reward": 1.0230578184127808, "reward_std": 0.3965718150138855, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.5734593868255615, "rewards/format_reward_step": 0.98828125, "step": 31 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5439977621483376, "calib/avg_num_step_conf": 4.171875, "calib/ece": 0.3516666666666667, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.03894820971867008, "calib/mean_conf": 0.5952380952380952, "calib/mu_c": 0.6236764705882353, "calib/mu_w": 0.5847282608695652, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3385317460317461, "calib/std_conf": 0.22601522032507923, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2092.0, "completions/max_terminated_length": 2092.0, "completions/mean_length": 264.09375, "completions/mean_terminated_length": 264.09375, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.034133333333333335, "grad_norm": 0.009222334250807762, "learning_rate": 4.666666666666667e-06, "loss": 0.0659, "num_tokens": 7070999.0, "reward": 1.0783140659332275, "reward_std": 0.35461053252220154, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.6488156318664551, "rewards/format_reward_step": 0.9765625, "step": 32 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5759622175141242, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.3166135458167331, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.06111670197740127, "calib/mean_conf": 0.5508764940239043, "calib/mu_c": 0.5976271186440679, "calib/mu_w": 0.5365104166666667, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3162151394422312, "calib/std_conf": 0.22650652938457116, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2500.0, "completions/max_terminated_length": 2500.0, "completions/mean_length": 265.62109375, "completions/mean_terminated_length": 266.6627502441406, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.0352, "grad_norm": 0.007931312546133995, "learning_rate": 4.638888888888889e-06, "loss": 0.0692, "num_tokens": 7245870.0, "reward": 1.0570218563079834, "reward_std": 0.3615739941596985, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.6765437722206116, "rewards/format_reward_step": 0.9765625, "step": 33 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5165995975855131, "calib/avg_num_step_conf": 3.6171875, "calib/ece": 0.20731225296442687, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.008900325027085654, "calib/mean_conf": 0.4113438735177865, "calib/mu_c": 0.41774647887323946, "calib/mu_w": 0.4088461538461538, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1690118577075099, "calib/std_conf": 0.21953727013038177, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1995.0, "completions/max_terminated_length": 1995.0, "completions/mean_length": 207.265625, "completions/mean_terminated_length": 208.0784454345703, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.03626666666666667, "grad_norm": 0.012170094065368176, "learning_rate": 4.611111111111112e-06, "loss": 0.0295, "num_tokens": 7404042.0, "reward": 1.1278128623962402, "reward_std": 0.3320312201976776, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7204695343971252, "rewards/format_reward_step": 0.98046875, "step": 34 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5160454621427377, "calib/avg_num_step_conf": 4.52734375, "calib/ece": 0.15588235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.0016839378238341918, "calib/mean_conf": 0.3437254901960784, "calib/mu_c": 0.34500000000000003, "calib/mu_w": 0.34331606217616584, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12823529411764706, "calib/std_conf": 0.19647641962306053, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 252.0, "completions/mean_terminated_length": 252.98825073242188, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.037333333333333336, "grad_norm": 0.009190870448946953, "learning_rate": 4.583333333333333e-06, "loss": 0.0373, "num_tokens": 7577810.0, "reward": 1.1189427375793457, "reward_std": 0.3060336709022522, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7613230347633362, "rewards/format_reward_step": 0.9921875, "step": 35 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5140435222672064, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.193125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007449392712550573, "calib/mean_conf": 0.28250000000000003, "calib/mu_c": 0.2869230769230769, "calib/mu_w": 0.2794736842105263, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.034687499999999996, "calib/std_conf": 0.17630761469658648, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 245.37890625, "completions/mean_terminated_length": 246.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.0384, "grad_norm": 0.009832658804953098, "learning_rate": 4.555555555555556e-06, "loss": -0.0565, "num_tokens": 7743339.0, "reward": 1.2605117559432983, "reward_std": 0.3142458498477936, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7124297022819519, "rewards/format_reward_step": 0.99609375, "step": 36 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6023396596858639, "calib/avg_num_step_conf": 4.08984375, "calib/ece": 0.12815686274509808, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04336142015706804, "calib/mean_conf": 0.18549019607843137, "calib/mu_c": 0.21796875, "calib/mu_w": 0.17460732984293195, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03133333333333335, "calib/std_conf": 0.15180113055315647, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2324.0, "completions/max_terminated_length": 2324.0, "completions/mean_length": 233.12890625, "completions/mean_terminated_length": 233.12890625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.039466666666666664, "grad_norm": 0.010432302951812744, "learning_rate": 4.527777777777778e-06, "loss": -0.0212, "num_tokens": 7910116.0, "reward": 1.1431448459625244, "reward_std": 0.22751232981681824, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7941023707389832, "rewards/format_reward_step": 0.9921875, "step": 37 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5711820231051001, "calib/avg_num_step_conf": 4.4921875, "calib/ece": 0.20245059288537554, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020861510284587215, "calib/mean_conf": 0.1416600790513834, "calib/mu_c": 0.1555952380952381, "calib/mu_w": 0.13473372781065088, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006047430830039525, "calib/std_conf": 0.12258582162401513, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1343.0, "completions/max_terminated_length": 1343.0, "completions/mean_length": 250.33984375, "completions/mean_terminated_length": 251.3215789794922, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.04053333333333333, "grad_norm": 0.010481895878911018, "learning_rate": 4.5e-06, "loss": 0.0221, "num_tokens": 8081091.0, "reward": 1.1860566139221191, "reward_std": 0.24294333159923553, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.7275820374488831, "rewards/format_reward_step": 0.98828125, "step": 38 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5060984624482554, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.22846456692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010470136014192832, "calib/mean_conf": 0.12681102362204724, "calib/mu_c": 0.11947368421052629, "calib/mu_w": 0.12994382022471912, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02803149606299212, "calib/std_conf": 0.12266587562901954, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2127.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 297.2890625, "completions/mean_terminated_length": 297.2890625, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.0416, "grad_norm": 0.008426911197602749, "learning_rate": 4.472222222222223e-06, "loss": 0.0851, "num_tokens": 8263285.0, "reward": 1.1606513261795044, "reward_std": 0.21219465136528015, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7353652119636536, "rewards/format_reward_step": 0.9921875, "step": 39 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5223880597014925, "calib/avg_num_step_conf": 5.953125, "calib/ece": 0.16732421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008548132354102483, "calib/mean_conf": 0.10697265624999999, "calib/mu_c": 0.11328358208955223, "calib/mu_w": 0.10473544973544975, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006289062499999998, "calib/std_conf": 0.09797318687308959, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1023.0, "completions/max_terminated_length": 1023.0, "completions/mean_length": 325.19921875, "completions/mean_terminated_length": 326.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.042666666666666665, "grad_norm": 0.007824345491826534, "learning_rate": 4.444444444444444e-06, "loss": -0.0146, "num_tokens": 8453296.0, "reward": 1.144127368927002, "reward_std": 0.20972102880477905, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.772629976272583, "rewards/format_reward_step": 0.9921875, "step": 40 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6262403870007442, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.4732745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025394753162986866, "calib/mean_conf": 0.08319607843137257, "calib/mu_c": 0.09474820143884893, "calib/mu_w": 0.06935344827586207, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005686274509803921, "calib/std_conf": 0.09673331433517804, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2434.0, "completions/max_terminated_length": 2434.0, "completions/mean_length": 321.4765625, "completions/mean_terminated_length": 321.4765625, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.04373333333333333, "grad_norm": 0.008975100703537464, "learning_rate": 4.416666666666667e-06, "loss": 0.0198, "num_tokens": 8642842.0, "reward": 1.3000850677490234, "reward_std": 0.22270143032073975, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5298577547073364, "rewards/format_reward_step": 0.984375, "step": 41 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.58271484375, "calib/avg_num_step_conf": 5.83984375, "calib/ece": 0.33187500000000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009666666666666664, "calib/mean_conf": 0.043125, "calib/mu_c": 0.049166666666666664, "calib/mu_w": 0.0395, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05222771654016668, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 736.0, "completions/max_terminated_length": 736.0, "completions/mean_length": 289.89453125, "completions/mean_terminated_length": 291.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.0448, "grad_norm": 0.008974535390734673, "learning_rate": 4.388888888888889e-06, "loss": 0.0092, "num_tokens": 8821423.0, "reward": 1.195831298828125, "reward_std": 0.17560523748397827, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6494749784469604, "rewards/format_reward_step": 0.9921875, "step": 42 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5491141219385096, "calib/avg_num_step_conf": 6.79296875, "calib/ece": 0.3434802371541501, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0145904442417926, "calib/mean_conf": 0.055729249011857704, "calib/mu_c": 0.0644950495049505, "calib/mu_w": 0.049904605263157896, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0754921063213057, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 380.34765625, "completions/mean_terminated_length": 381.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.04586666666666667, "grad_norm": 0.00716799171641469, "learning_rate": 4.361111111111112e-06, "loss": 0.0194, "num_tokens": 9024016.0, "reward": 1.2066413164138794, "reward_std": 0.23286837339401245, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6359390020370483, "rewards/format_reward_step": 0.98828125, "step": 43 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5405405405405406, "calib/avg_num_step_conf": 8.015625, "calib/ece": 0.2551394422310757, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007262940907008693, "calib/mean_conf": 0.042310756972111556, "calib/mu_c": 0.04743243243243243, "calib/mu_w": 0.040169491525423734, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0013147410358565737, "calib/std_conf": 0.05426288970400109, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2002.0, "completions/max_terminated_length": 2002.0, "completions/mean_length": 422.07421875, "completions/mean_terminated_length": 423.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.046933333333333334, "grad_norm": 0.0074686165899038315, "learning_rate": 4.333333333333334e-06, "loss": 0.0368, "num_tokens": 9238387.0, "reward": 1.1363898515701294, "reward_std": 0.18531301617622375, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7141859531402588, "rewards/format_reward_step": 0.98046875, "step": 44 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.541655585106383, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.34565748031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020385638297872356, "calib/mean_conf": 0.02906692913385827, "calib/mu_c": 0.030351063829787233, "calib/mu_w": 0.028312499999999997, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023228346456692917, "calib/std_conf": 0.045259873512994944, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2238.0, "completions/max_terminated_length": 2238.0, "completions/mean_length": 412.1015625, "completions/mean_terminated_length": 412.1015625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.048, "grad_norm": 0.006873466074466705, "learning_rate": 4.305555555555556e-06, "loss": 0.0854, "num_tokens": 9448933.0, "reward": 1.185490369796753, "reward_std": 0.1912062168121338, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6444183588027954, "rewards/format_reward_step": 0.9921875, "step": 45 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5584940677404387, "calib/avg_num_step_conf": 9.4140625, "calib/ece": 0.39927419354838706, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006066149665274752, "calib/mean_conf": 0.039354838709677424, "calib/mu_c": 0.042803738317757016, "calib/mu_w": 0.036737588652482264, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003588709677419355, "calib/std_conf": 0.07931909764224779, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 481.7890625, "completions/mean_terminated_length": 481.7890625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.04906666666666667, "grad_norm": 0.0063409628346562386, "learning_rate": 4.277777777777778e-06, "loss": 0.0921, "num_tokens": 9677039.0, "reward": 1.1879210472106934, "reward_std": 0.23919141292572021, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5750609040260315, "rewards/format_reward_step": 0.96484375, "step": 46 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4665651483050848, "calib/avg_num_step_conf": 9.8359375, "calib/ece": 0.4530894308943089, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008414989406779663, "calib/mean_conf": 0.036666666666666674, "calib/mu_c": 0.032288135593220336, "calib/mu_w": 0.040703125, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0050406504065040655, "calib/std_conf": 0.05992768903669766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 495.2421875, "completions/mean_terminated_length": 499.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.050133333333333335, "grad_norm": 0.0063353064469993114, "learning_rate": 4.25e-06, "loss": 0.1315, "num_tokens": 9909797.0, "reward": 1.2039176225662231, "reward_std": 0.23223577439785004, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5250226259231567, "rewards/format_reward_step": 0.9609375, "step": 47 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5106014412416852, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.32361111111111107, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.011482815964523278, "calib/mean_conf": 0.025595238095238095, "calib/mu_c": 0.033068181818181816, "calib/mu_w": 0.02158536585365854, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.06970489707141789, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2494.0, "completions/max_terminated_length": 2494.0, "completions/mean_length": 440.2578125, "completions/mean_terminated_length": 441.9843444824219, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.0512, "grad_norm": 0.0075785936787724495, "learning_rate": 4.222222222222223e-06, "loss": 0.0148, "num_tokens": 10126191.0, "reward": 1.1649034023284912, "reward_std": 0.21381776034832, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6579316854476929, "rewards/format_reward_step": 0.984375, "step": 48 }, { "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5366172316384181, "calib/avg_num_step_conf": 9.73046875, "calib/ece": 0.481218487394958, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00023022598870057337, "calib/mean_conf": 0.02130252100840336, "calib/mu_c": 0.021186440677966097, "calib/mu_w": 0.02141666666666667, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003361344537815126, "calib/std_conf": 0.038897420497709216, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 548.5546875, "completions/mean_terminated_length": 550.7059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.05226666666666667, "grad_norm": 0.0068258256651461124, "learning_rate": 4.194444444444445e-06, "loss": 0.0895, "num_tokens": 10371157.0, "reward": 1.165101408958435, "reward_std": 0.23141947388648987, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.48254647850990295, "rewards/format_reward_step": 0.92578125, "step": 49 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5855808486947915, "calib/avg_num_step_conf": 10.58203125, "calib/ece": 0.4436078431372548, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.00983607571446244, "calib/mean_conf": 0.030901960784313728, "calib/mu_c": 0.03618644067796609, "calib/mu_w": 0.026350364963503653, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0058823529411764705, "calib/std_conf": 0.0949776557627874, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 531.93359375, "completions/mean_terminated_length": 534.0196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.05333333333333334, "grad_norm": 0.005866543855518103, "learning_rate": 4.166666666666667e-06, "loss": 0.0094, "num_tokens": 10612692.0, "reward": 1.2332239151000977, "reward_std": 0.22173798084259033, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.556291401386261, "rewards/format_reward_step": 0.98828125, "step": 50 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5589319771137954, "calib/avg_num_step_conf": 10.3203125, "calib/ece": 0.5042629482071712, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.007488239033693576, "calib/mean_conf": 0.029840637450199207, "calib/mu_c": 0.02623076923076923, "calib/mu_w": 0.03371900826446281, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00808764940239044, "calib/std_conf": 0.07733081070862133, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2674.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 509.7265625, "completions/mean_terminated_length": 511.72552490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.0544, "grad_norm": 0.006805317010730505, "learning_rate": 4.138888888888889e-06, "loss": 0.0505, "num_tokens": 10852478.0, "reward": 1.244327187538147, "reward_std": 0.18637487292289734, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.4925605356693268, "rewards/format_reward_step": 0.98046875, "step": 51 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5733447488584474, "calib/avg_num_step_conf": 8.82421875, "calib/ece": 0.5457480314960629, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012048452562151202, "calib/mean_conf": 0.029055118110236217, "calib/mu_c": 0.034178082191780826, "calib/mu_w": 0.022129629629629624, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04338861331206069, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 467.54296875, "completions/mean_terminated_length": 469.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.055466666666666664, "grad_norm": 0.007461534813046455, "learning_rate": 4.111111111111111e-06, "loss": 0.0133, "num_tokens": 11080121.0, "reward": 1.2954832315444946, "reward_std": 0.18780872225761414, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.4581538736820221, "rewards/format_reward_step": 0.9921875, "step": 52 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6381730769230769, "calib/avg_num_step_conf": 11.16796875, "calib/ece": 0.47348, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024557692307692315, "calib/mean_conf": 0.046520000000000006, "calib/mu_c": 0.05830769230769231, "calib/mu_w": 0.033749999999999995, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.050709857029970024, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 561.5625, "completions/mean_terminated_length": 565.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.05653333333333333, "grad_norm": 0.00657409243285656, "learning_rate": 4.083333333333334e-06, "loss": -0.0219, "num_tokens": 11329705.0, "reward": 1.2578158378601074, "reward_std": 0.2188633382320404, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5195378661155701, "rewards/format_reward_step": 0.97265625, "step": 53 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5264729299363057, "calib/avg_num_step_conf": 9.6796875, "calib/ece": 0.5624901185770751, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006783439490445867, "calib/mean_conf": 0.07079051383399211, "calib/mu_c": 0.06821656050955414, "calib/mu_w": 0.07500000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006363636363636363, "calib/std_conf": 0.07321639099281427, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2682.0, "completions/max_terminated_length": 2682.0, "completions/mean_length": 505.84375, "completions/mean_terminated_length": 505.84375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.0576, "grad_norm": 0.006965023465454578, "learning_rate": 4.055555555555556e-06, "loss": 0.0331, "num_tokens": 11565433.0, "reward": 1.3283171653747559, "reward_std": 0.18125469982624054, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.4456968903541565, "rewards/format_reward_step": 0.984375, "step": 54 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6126884095634095, "calib/avg_num_step_conf": 10.7109375, "calib/ece": 0.34039682539682536, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.011431912681912668, "calib/mean_conf": 0.07992063492063492, "calib/mu_c": 0.08663461538461538, "calib/mu_w": 0.07520270270270271, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003809523809523809, "calib/std_conf": 0.06857964934555134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 528.74609375, "completions/mean_terminated_length": 532.909423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.058666666666666666, "grad_norm": 0.006629865616559982, "learning_rate": 4.027777777777779e-06, "loss": 0.047, "num_tokens": 11808616.0, "reward": 1.2133586406707764, "reward_std": 0.22967413067817688, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.6337484121322632, "rewards/format_reward_step": 0.98046875, "step": 55 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49834107498341074, "calib/avg_num_step_conf": 12.3671875, "calib/ece": 0.3597165991902834, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001307232913072276, "calib/mean_conf": 0.0908906882591093, "calib/mu_c": 0.09081818181818183, "calib/mu_w": 0.09094890510948905, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0026315789473684206, "calib/std_conf": 0.042110518202119, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 607.5078125, "completions/mean_terminated_length": 612.2913208007812, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.05973333333333333, "grad_norm": 0.006633771117776632, "learning_rate": 4.000000000000001e-06, "loss": 0.0777, "num_tokens": 12070978.0, "reward": 1.2138700485229492, "reward_std": 0.2590479254722595, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.603521466255188, "rewards/format_reward_step": 0.96484375, "step": 56 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5082633053221288, "calib/avg_num_step_conf": 13.42578125, "calib/ece": 0.4905113636363636, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.004132231404958678, "calib/gap": -0.00479709383753503, "calib/mean_conf": 0.09585227272727274, "calib/mu_c": 0.09383035714285715, "calib/mu_w": 0.09862745098039218, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003925619834710744, "calib/std_conf": 0.06541273226486852, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 656.30859375, "completions/mean_terminated_length": 661.4763793945312, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.0608, "grad_norm": 0.006253418512642384, "learning_rate": 3.972222222222223e-06, "loss": 0.1262, "num_tokens": 12345785.0, "reward": 1.2676377296447754, "reward_std": 0.2519448399543762, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4844941794872284, "rewards/format_reward_step": 0.94140625, "step": 57 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.6332846003898636, "calib/avg_num_step_conf": 14.1796875, "calib/ece": 0.3548340244813277, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.004149377593360996, "calib/gap": 0.026280283291562248, "calib/mean_conf": 0.09910788423236516, "calib/mu_c": 0.11361111111111112, "calib/mu_w": 0.08733082781954887, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002904564315352697, "calib/std_conf": 0.07863548954889332, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2539.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 686.38671875, "completions/mean_terminated_length": 694.5256958007812, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.06186666666666667, "grad_norm": 0.0055255042389035225, "learning_rate": 3.944444444444445e-06, "loss": 0.0364, "num_tokens": 12627820.0, "reward": 1.1890089511871338, "reward_std": 0.30035144090652466, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5967679023742676, "rewards/format_reward_step": 0.9375, "step": 58 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.537975912975913, "calib/avg_num_step_conf": 13.39453125, "calib/ece": 0.4355020080321285, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013247863247863034, "calib/mean_conf": 0.10104417670682732, "calib/mu_c": 0.10166666666666667, "calib/mu_w": 0.10034188034188037, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0032128514056224897, "calib/std_conf": 0.05535313844732542, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2864.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 641.71484375, "completions/mean_terminated_length": 646.7677001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.06293333333333333, "grad_norm": 0.006042384542524815, "learning_rate": 3.916666666666667e-06, "loss": 0.0958, "num_tokens": 12898347.0, "reward": 1.266005516052246, "reward_std": 0.27409636974334717, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5398234128952026, "rewards/format_reward_step": 0.9609375, "step": 59 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5423945176027949, "calib/avg_num_step_conf": 13.0, "calib/ece": 0.4001639344262295, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00737704918032786, "calib/mean_conf": 0.10147540983606558, "calib/mu_c": 0.10516393442622951, "calib/mu_w": 0.09778688524590165, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008196721311475411, "calib/std_conf": 0.03207757326855084, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 656.50390625, "completions/mean_terminated_length": 659.0784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.064, "grad_norm": 0.005965266842395067, "learning_rate": 3.88888888888889e-06, "loss": 0.1274, "num_tokens": 13175268.0, "reward": 1.2400319576263428, "reward_std": 0.25855231285095215, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5660015344619751, "rewards/format_reward_step": 0.953125, "step": 60 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5484985384002126, "calib/avg_num_step_conf": 11.09375, "calib/ece": 0.46016129032258063, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005378687217645464, "calib/mean_conf": 0.1124193548387097, "calib/mu_c": 0.11471830985915492, "calib/mu_w": 0.10933962264150945, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0371759323752729, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2766.0, "completions/max_terminated_length": 2766.0, "completions/mean_length": 561.73046875, "completions/mean_terminated_length": 563.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.06506666666666666, "grad_norm": 0.006542930845171213, "learning_rate": 3.861111111111112e-06, "loss": 0.0477, "num_tokens": 13423135.0, "reward": 1.2990654706954956, "reward_std": 0.21576321125030518, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5200058817863464, "rewards/format_reward_step": 0.9609375, "step": 61 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5646021328958163, "calib/avg_num_step_conf": 13.12890625, "calib/ece": 0.3220491803278688, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01139458572600495, "calib/mean_conf": 0.11581967213114756, "calib/mu_c": 0.12226415094339625, "calib/mu_w": 0.1108695652173913, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0017213114754098362, "calib/std_conf": 0.053144408338012566, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 665.9609375, "completions/mean_terminated_length": 671.2047119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.06613333333333334, "grad_norm": 0.005913985893130302, "learning_rate": 3.833333333333334e-06, "loss": 0.094, "num_tokens": 13700701.0, "reward": 1.1991487741470337, "reward_std": 0.30506694316864014, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6209539175033569, "rewards/format_reward_step": 0.94921875, "step": 62 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.576269422863485, "calib/avg_num_step_conf": 13.2421875, "calib/ece": 0.4343801652892562, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011179245283018863, "calib/mean_conf": 0.12760330578512397, "calib/mu_c": 0.1325, "calib/mu_w": 0.12132075471698114, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.047273160721586785, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2743.0, "completions/max_terminated_length": 2743.0, "completions/mean_length": 644.63671875, "completions/mean_terminated_length": 657.4780883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.0672, "grad_norm": 0.005883402191102505, "learning_rate": 3.8055555555555556e-06, "loss": 0.0142, "num_tokens": 13974368.0, "reward": 1.2702515125274658, "reward_std": 0.32960256934165955, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5365968942642212, "rewards/format_reward_step": 0.94140625, "step": 63 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.524519833674345, "calib/avg_num_step_conf": 12.22265625, "calib/ece": 0.43112096774193537, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00032618309022508507, "calib/mean_conf": 0.1333951612903226, "calib/mu_c": 0.13325179856115107, "calib/mu_w": 0.13357798165137616, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0020161290322580645, "calib/std_conf": 0.048966136842026345, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2339.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 603.81640625, "completions/mean_terminated_length": 608.5708618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.06826666666666667, "grad_norm": 0.006580499932169914, "learning_rate": 3.777777777777778e-06, "loss": 0.0234, "num_tokens": 14232721.0, "reward": 1.3028054237365723, "reward_std": 0.22784338891506195, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5509234666824341, "rewards/format_reward_step": 0.96875, "step": 64 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5524828549657099, "calib/avg_num_step_conf": 10.08984375, "calib/ece": 0.36749003984063744, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009918084836169683, "calib/mean_conf": 0.13848605577689244, "calib/mu_c": 0.14338582677165354, "calib/mu_w": 0.13346774193548386, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.04704091214167084, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 512.69140625, "completions/mean_terminated_length": 516.7283325195312, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.06933333333333333, "grad_norm": 0.007941358722746372, "learning_rate": 3.7500000000000005e-06, "loss": 0.0231, "num_tokens": 14468994.0, "reward": 1.2930680513381958, "reward_std": 0.17943525314331055, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6056671738624573, "rewards/format_reward_step": 0.98046875, "step": 65 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.6290690220101147, "calib/avg_num_step_conf": 13.484375, "calib/ece": 0.2954583333333334, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015108625970510714, "calib/mean_conf": 0.13204166666666667, "calib/mu_c": 0.14079207920792078, "calib/mu_w": 0.12568345323741006, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0033333333333333335, "calib/std_conf": 0.063039325799236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 675.0078125, "completions/mean_terminated_length": 683.0119018554688, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.0704, "grad_norm": 0.006969817914068699, "learning_rate": 3.7222222222222225e-06, "loss": 0.0674, "num_tokens": 14748148.0, "reward": 1.1725585460662842, "reward_std": 0.265777587890625, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.6263669729232788, "rewards/format_reward_step": 0.9296875, "step": 66 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6252823048281755, "calib/avg_num_step_conf": 11.24609375, "calib/ece": 0.3509374999999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024613318684001656, "calib/mean_conf": 0.14515625, "calib/mu_c": 0.1575590551181102, "calib/mu_w": 0.13294573643410854, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.05509203060277866, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1709.0, "completions/max_terminated_length": 1709.0, "completions/mean_length": 601.875, "completions/mean_terminated_length": 604.2353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.07146666666666666, "grad_norm": 0.006899421103298664, "learning_rate": 3.694444444444445e-06, "loss": -0.0108, "num_tokens": 15007236.0, "reward": 1.3141582012176514, "reward_std": 0.14711973071098328, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6361289024353027, "rewards/format_reward_step": 1.0, "step": 67 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6069020356234096, "calib/avg_num_step_conf": 10.9296875, "calib/ece": 0.32878486055776895, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023849236641221405, "calib/mean_conf": 0.1493027888446215, "calib/mu_c": 0.16175000000000003, "calib/mu_w": 0.13790076335877863, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.050258415403945766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2691.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 571.80859375, "completions/mean_terminated_length": 571.80859375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.07253333333333334, "grad_norm": 0.007070120424032211, "learning_rate": 3.6666666666666666e-06, "loss": 0.0571, "num_tokens": 15257707.0, "reward": 1.274619698524475, "reward_std": 0.17965587973594666, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6351768374443054, "rewards/format_reward_step": 0.9765625, "step": 68 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48980032047331445, "calib/avg_num_step_conf": 10.9765625, "calib/ece": 0.3248235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004690003697769285, "calib/mean_conf": 0.1536078431372549, "calib/mu_c": 0.15385245901639347, "calib/mu_w": 0.15338345864661654, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.057846340888352415, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2086.0, "completions/max_terminated_length": 2086.0, "completions/mean_length": 596.078125, "completions/mean_terminated_length": 598.4157104492188, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.0736, "grad_norm": 0.007091444917023182, "learning_rate": 3.638888888888889e-06, "loss": 0.0157, "num_tokens": 15514799.0, "reward": 1.2942771911621094, "reward_std": 0.23002542555332184, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6393355131149292, "rewards/format_reward_step": 0.99609375, "step": 69 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7273175542406312, "calib/avg_num_step_conf": 10.15625, "calib/ece": 0.3274898785425101, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": 0.029769230769230798, "calib/mean_conf": 0.17048582995951417, "calib/mu_c": 0.18615384615384617, "calib/mu_w": 0.15638461538461537, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012145748987854251, "calib/std_conf": 0.10982764746359674, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2881.0, "completions/max_terminated_length": 2881.0, "completions/mean_length": 601.23046875, "completions/mean_terminated_length": 603.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.07466666666666667, "grad_norm": 0.006808578036725521, "learning_rate": 3.6111111111111115e-06, "loss": 0.0656, "num_tokens": 15775706.0, "reward": 1.258596658706665, "reward_std": 0.22801057994365692, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6382870674133301, "rewards/format_reward_step": 0.96484375, "step": 70 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6433621366849961, "calib/avg_num_step_conf": 10.703125, "calib/ece": 0.280875, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0325803875360042, "calib/mean_conf": 0.18283467741935486, "calib/mu_c": 0.20043859649122808, "calib/mu_w": 0.16785820895522388, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0020161290322580645, "calib/std_conf": 0.07578663336945762, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 605.359375, "completions/mean_terminated_length": 614.96826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.07573333333333333, "grad_norm": 0.006682004313915968, "learning_rate": 3.5833333333333335e-06, "loss": 0.0171, "num_tokens": 16035086.0, "reward": 1.2577886581420898, "reward_std": 0.2718082070350647, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6601086258888245, "rewards/format_reward_step": 0.96484375, "step": 71 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6123209540475727, "calib/avg_num_step_conf": 11.37890625, "calib/ece": 0.2582, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02505347073692399, "calib/mean_conf": 0.18580000000000002, "calib/mu_c": 0.19972972972972972, "calib/mu_w": 0.17467625899280573, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06568074299214344, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 578.66015625, "completions/mean_terminated_length": 578.66015625, "completions/min_length": 219.0, "completions/min_terminated_length": 219.0, "epoch": 0.0768, "grad_norm": 0.007656267378479242, "learning_rate": 3.555555555555556e-06, "loss": 0.0642, "num_tokens": 16287631.0, "reward": 1.260998249053955, "reward_std": 0.20352822542190552, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6782464981079102, "rewards/format_reward_step": 0.9765625, "step": 72 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6154974659909309, "calib/avg_num_step_conf": 10.42578125, "calib/ece": 0.44007843137254904, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025857562016537772, "calib/mean_conf": 0.1991372549019608, "calib/mu_c": 0.20846625766871169, "calib/mu_w": 0.1826086956521739, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06644502950112682, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2272.0, "completions/max_terminated_length": 2272.0, "completions/mean_length": 562.6796875, "completions/mean_terminated_length": 562.6796875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.07786666666666667, "grad_norm": 0.00714003574103117, "learning_rate": 3.5277777777777784e-06, "loss": 0.024, "num_tokens": 16538709.0, "reward": 1.4214353561401367, "reward_std": 0.19357150793075562, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.5772457122802734, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48861584107327144, "calib/avg_num_step_conf": 11.1015625, "calib/ece": 0.34632000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007172342621258876, "calib/mean_conf": 0.19768, "calib/mu_c": 0.1973529411764706, "calib/mu_w": 0.19807017543859648, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0, "calib/std_conf": 0.06415152063669263, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 582.40234375, "completions/mean_terminated_length": 584.686279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07893333333333333, "grad_norm": 0.007149734999984503, "learning_rate": 3.5e-06, "loss": 0.0427, "num_tokens": 16791732.0, "reward": 1.3170249462127686, "reward_std": 0.25684505701065063, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6067058444023132, "rewards/format_reward_step": 0.96484375, "step": 74 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5926470588235293, "calib/avg_num_step_conf": 10.203125, "calib/ece": 0.43873015873015864, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.023167862266857964, "calib/mean_conf": 0.23587301587301587, "calib/mu_c": 0.24341176470588233, "calib/mu_w": 0.22024390243902436, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07881332013427077, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 529.44140625, "completions/mean_terminated_length": 533.6102294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.08, "grad_norm": 0.007752260658890009, "learning_rate": 3.4722222222222224e-06, "loss": 0.0114, "num_tokens": 17032021.0, "reward": 1.4476063251495361, "reward_std": 0.2468341886997223, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.5827124714851379, "rewards/format_reward_step": 0.984375, "step": 75 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7110824059353472, "calib/avg_num_step_conf": 11.94921875, "calib/ece": 0.33240000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.059536301006889225, "calib/mean_conf": 0.26152, "calib/mu_c": 0.2858108108108108, "calib/mu_w": 0.22627450980392155, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00096, "calib/std_conf": 0.08311491803521195, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2689.0, "completions/max_terminated_length": 2689.0, "completions/mean_length": 626.265625, "completions/mean_terminated_length": 628.7216186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.08106666666666666, "grad_norm": 0.006362370681017637, "learning_rate": 3.444444444444445e-06, "loss": 0.0334, "num_tokens": 17295401.0, "reward": 1.3979976177215576, "reward_std": 0.26154667139053345, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6553703546524048, "rewards/format_reward_step": 0.9765625, "step": 76 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5586135895676047, "calib/avg_num_step_conf": 11.078125, "calib/ece": 0.342570281124498, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010699382292381543, "calib/mean_conf": 0.2857028112449799, "calib/mu_c": 0.28974193548387095, "calib/mu_w": 0.2790425531914894, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002891566265060241, "calib/std_conf": 0.0844709975663121, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 566.75, "completions/mean_terminated_length": 568.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.08213333333333334, "grad_norm": 0.007629550527781248, "learning_rate": 3.416666666666667e-06, "loss": 0.0344, "num_tokens": 17545153.0, "reward": 1.4076530933380127, "reward_std": 0.2604719400405884, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6317124962806702, "rewards/format_reward_step": 0.97265625, "step": 77 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6027968909276248, "calib/avg_num_step_conf": 11.125, "calib/ece": 0.2543083003952569, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020599515800203805, "calib/mean_conf": 0.3189723320158102, "calib/mu_c": 0.32784722222222223, "calib/mu_w": 0.30724770642201843, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0020553359683794467, "calib/std_conf": 0.08237417777032076, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1963.0, "completions/max_terminated_length": 1963.0, "completions/mean_length": 627.703125, "completions/mean_terminated_length": 630.1647338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 250.0, "epoch": 0.0832, "grad_norm": 0.007345384452491999, "learning_rate": 3.3888888888888893e-06, "loss": 0.0249, "num_tokens": 17813869.0, "reward": 1.4042229652404785, "reward_std": 0.27890828251838684, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6873522996902466, "rewards/format_reward_step": 0.98828125, "step": 78 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6281788079470199, "calib/avg_num_step_conf": 11.22265625, "calib/ece": 0.24366533864541837, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.032754304635761566, "calib/mean_conf": 0.35880478087649403, "calib/mu_c": 0.3718543046357615, "calib/mu_w": 0.33909999999999996, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00043824701195219337, "calib/std_conf": 0.08453862207631224, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 620.44140625, "completions/mean_terminated_length": 622.8745727539062, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.08426666666666667, "grad_norm": 0.006675400771200657, "learning_rate": 3.3611111111111117e-06, "loss": 0.0373, "num_tokens": 18079078.0, "reward": 1.428109884262085, "reward_std": 0.26775938272476196, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6960632801055908, "rewards/format_reward_step": 0.98046875, "step": 79 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5726342371079214, "calib/avg_num_step_conf": 11.06640625, "calib/ece": 0.2271713147410359, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013997873471557731, "calib/mean_conf": 0.38342629482071716, "calib/mu_c": 0.38894736842105265, "calib/mu_w": 0.3749494949494949, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00250996015936255, "calib/std_conf": 0.08255514603746796, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 583.53515625, "completions/mean_terminated_length": 583.53515625, "completions/min_length": 233.0, "completions/min_terminated_length": 233.0, "epoch": 0.08533333333333333, "grad_norm": 0.00843469426035881, "learning_rate": 3.3333333333333333e-06, "loss": 0.0404, "num_tokens": 18330623.0, "reward": 1.4328678846359253, "reward_std": 0.2943243980407715, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6977671980857849, "rewards/format_reward_step": 0.98046875, "step": 80 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5654385964912281, "calib/avg_num_step_conf": 11.47265625, "calib/ece": 0.21689795918367347, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": 0.02476140350877193, "calib/mean_conf": 0.407265306122449, "calib/mu_c": 0.41686666666666666, "calib/mu_w": 0.39210526315789473, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005959183673469389, "calib/std_conf": 0.08770236668907784, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 641.203125, "completions/mean_terminated_length": 648.8063354492188, "completions/min_length": 0.0, "completions/min_terminated_length": 233.0, "epoch": 0.0864, "grad_norm": 0.007108001969754696, "learning_rate": 3.3055555555555558e-06, "loss": 0.0315, "num_tokens": 18601019.0, "reward": 1.411208152770996, "reward_std": 0.2808418571949005, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6935101747512817, "rewards/format_reward_step": 0.95703125, "step": 81 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5786701777485188, "calib/avg_num_step_conf": 10.796875, "calib/ece": 0.18498023715415013, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030245556287030906, "calib/mean_conf": 0.44189723320158103, "calib/mu_c": 0.45361290322580644, "calib/mu_w": 0.42336734693877554, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007114624505928853, "calib/std_conf": 0.09293819035007303, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 587.90234375, "completions/mean_terminated_length": 590.2078857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.08746666666666666, "grad_norm": 0.007196605671197176, "learning_rate": 3.277777777777778e-06, "loss": -0.0045, "num_tokens": 18857074.0, "reward": 1.4649035930633545, "reward_std": 0.2847653031349182, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7305883169174194, "rewards/format_reward_step": 0.98828125, "step": 82 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5513020833333333, "calib/avg_num_step_conf": 12.9765625, "calib/ece": 0.07092741935483868, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.017000000000000015, "calib/mean_conf": 0.5105241935483872, "calib/mu_c": 0.51875, "calib/mu_w": 0.50175, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.032661290322580645, "calib/std_conf": 0.10662021563668943, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2724.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 678.69140625, "completions/mean_terminated_length": 692.211181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.08853333333333334, "grad_norm": 0.006281640846282244, "learning_rate": 3.2500000000000002e-06, "loss": 0.0029, "num_tokens": 19138083.0, "reward": 1.3463736772537231, "reward_std": 0.3007776141166687, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7239972352981567, "rewards/format_reward_step": 0.96875, "step": 83 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.635747865891377, "calib/avg_num_step_conf": 9.83984375, "calib/ece": 0.05466666666666668, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04765000618582216, "calib/mean_conf": 0.5251764705882354, "calib/mu_c": 0.5472262773722628, "calib/mu_w": 0.49957627118644066, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.021294117647058824, "calib/std_conf": 0.084397915103501, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2376.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 573.56640625, "completions/mean_terminated_length": 573.56640625, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 0.0896, "grad_norm": 0.006773416418582201, "learning_rate": 3.2222222222222227e-06, "loss": 0.0459, "num_tokens": 19390836.0, "reward": 1.415609359741211, "reward_std": 0.2515171766281128, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7648124694824219, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4925797503467406, "calib/avg_num_step_conf": 10.125, "calib/ece": 0.10993827160493824, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.00823045267489712, "calib/gap": -0.009399445214979263, "calib/mean_conf": 0.5405555555555557, "calib/mu_c": 0.5365714285714286, "calib/mu_w": 0.5459708737864079, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03718106995884774, "calib/std_conf": 0.08467682450974114, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2831.0, "completions/max_terminated_length": 2831.0, "completions/mean_length": 619.8359375, "completions/mean_terminated_length": 629.6746215820312, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.09066666666666667, "grad_norm": 0.00668597687035799, "learning_rate": 3.1944444444444443e-06, "loss": 0.0884, "num_tokens": 19657338.0, "reward": 1.3684334754943848, "reward_std": 0.34873247146606445, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7017108201980591, "rewards/format_reward_step": 0.94140625, "step": 85 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5235327743902439, "calib/avg_num_step_conf": 9.296875, "calib/ece": 0.04788844621513937, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.00867695630081311, "calib/mean_conf": 0.5462948207171315, "calib/mu_c": 0.550546875, "calib/mu_w": 0.5418699186991869, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.04211155378486048, "calib/std_conf": 0.0855694350816165, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2709.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 613.1640625, "completions/mean_terminated_length": 615.5686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.09173333333333333, "grad_norm": 0.006188141647726297, "learning_rate": 3.1666666666666667e-06, "loss": 0.006, "num_tokens": 19919820.0, "reward": 1.3382601737976074, "reward_std": 0.31673046946525574, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7155828475952148, "rewards/format_reward_step": 0.9609375, "step": 86 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4794137695978186, "calib/avg_num_step_conf": 8.921875, "calib/ece": 0.09019762845849802, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.0007171097477847166, "calib/mean_conf": 0.5555731225296443, "calib/mu_c": 0.5558282208588957, "calib/mu_w": 0.555111111111111, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007509881422924868, "calib/std_conf": 0.0759977609726662, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2678.0, "completions/max_terminated_length": 2678.0, "completions/mean_length": 578.31640625, "completions/mean_terminated_length": 580.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.0928, "grad_norm": 0.006629766430705786, "learning_rate": 3.138888888888889e-06, "loss": 0.0131, "num_tokens": 20173365.0, "reward": 1.5051707029342651, "reward_std": 0.2687286138534546, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7486226558685303, "rewards/format_reward_step": 0.98828125, "step": 87 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5916537166537167, "calib/avg_num_step_conf": 9.8984375, "calib/ece": 0.0627888446215139, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.02293900543900529, "calib/mean_conf": 0.5833466135458166, "calib/mu_c": 0.5932167832167831, "calib/mu_w": 0.5702777777777778, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03820717131474104, "calib/std_conf": 0.08415951569583457, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2345.0, "completions/max_terminated_length": 2345.0, "completions/mean_length": 683.08984375, "completions/mean_terminated_length": 685.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 247.0, "epoch": 0.09386666666666667, "grad_norm": 0.005916224326938391, "learning_rate": 3.1111111111111116e-06, "loss": 0.0287, "num_tokens": 20458084.0, "reward": 1.417344093322754, "reward_std": 0.3309006094932556, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7409383058547974, "rewards/format_reward_step": 0.9765625, "step": 88 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5273050546101093, "calib/avg_num_step_conf": 9.9921875, "calib/ece": 0.13605577689243026, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.006394462788925415, "calib/mean_conf": 0.610796812749004, "calib/mu_c": 0.6140322580645161, "calib/mu_w": 0.6076377952755907, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12641434262948206, "calib/std_conf": 0.088092685648366, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2773.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 666.9140625, "completions/mean_terminated_length": 669.5294799804688, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.09493333333333333, "grad_norm": 0.00593179976567626, "learning_rate": 3.0833333333333336e-06, "loss": 0.0392, "num_tokens": 20737702.0, "reward": 1.3242459297180176, "reward_std": 0.31110048294067383, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7109917998313904, "rewards/format_reward_step": 0.96875, "step": 89 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4754514100223169, "calib/avg_num_step_conf": 10.07421875, "calib/ece": 0.0828174603174604, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": -0.007033881111787377, "calib/mean_conf": 0.6326587301587302, "calib/mu_c": 0.630062893081761, "calib/mu_w": 0.6370967741935484, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04226190476190482, "calib/std_conf": 0.09127228257604533, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 639.89453125, "completions/mean_terminated_length": 644.9330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.096, "grad_norm": 0.006317852530628443, "learning_rate": 3.055555555555556e-06, "loss": 0.0223, "num_tokens": 21004835.0, "reward": 1.4782195091247559, "reward_std": 0.3107277750968933, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7376890778541565, "rewards/format_reward_step": 0.9765625, "step": 90 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4880708929788684, "calib/avg_num_step_conf": 9.8046875, "calib/ece": 0.09620553359683795, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": -0.003746421267893596, "calib/mean_conf": 0.6722529644268775, "calib/mu_c": 0.670920245398773, "calib/mu_w": 0.6746666666666666, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06209486166007907, "calib/std_conf": 0.1028029276716013, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2565.0, "completions/max_terminated_length": 2565.0, "completions/mean_length": 663.2265625, "completions/mean_terminated_length": 668.4487915039062, "completions/min_length": 0.0, "completions/min_terminated_length": 343.0, "epoch": 0.09706666666666666, "grad_norm": 0.005937293637543917, "learning_rate": 3.0277777777777776e-06, "loss": -0.0122, "num_tokens": 21282333.0, "reward": 1.4947071075439453, "reward_std": 0.3460769057273865, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.7394140362739563, "rewards/format_reward_step": 0.9765625, "step": 91 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5356643356643357, "calib/avg_num_step_conf": 9.9921875, "calib/ece": 0.13889328063241108, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.01579720279720276, "calib/mean_conf": 0.7041106719367588, "calib/mu_c": 0.7109790209790209, "calib/mu_w": 0.6951818181818181, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13889328063241108, "calib/std_conf": 0.10683600711255561, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2204.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 646.125, "completions/mean_terminated_length": 648.6588745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 245.0, "epoch": 0.09813333333333334, "grad_norm": 0.006593839265406132, "learning_rate": 3e-06, "loss": 0.0095, "num_tokens": 21554461.0, "reward": 1.4110760688781738, "reward_std": 0.37093257904052734, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.7205894589424133, "rewards/format_reward_step": 0.984375, "step": 92 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.376097057305111, "calib/avg_num_step_conf": 9.8671875, "calib/ece": 0.19332015810276676, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": -0.04713538977800724, "calib/mean_conf": 0.7196442687747036, "calib/mu_c": 0.7002684563758389, "calib/mu_w": 0.7474038461538461, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.16201581027667983, "calib/std_conf": 0.1155852607674471, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2327.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 680.26171875, "completions/mean_terminated_length": 680.26171875, "completions/min_length": 272.0, "completions/min_terminated_length": 272.0, "epoch": 0.0992, "grad_norm": 0.012477902695536613, "learning_rate": 2.9722222222222225e-06, "loss": 0.0395, "num_tokens": 21834384.0, "reward": 1.4113017320632935, "reward_std": 0.3995455503463745, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6858847141265869, "rewards/format_reward_step": 0.97265625, "step": 93 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6025943396226414, "calib/avg_num_step_conf": 9.1875, "calib/ece": 0.15650406504065043, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.024390243902439025, "calib/gap": 0.035385444743935346, "calib/mean_conf": 0.725609756097561, "calib/mu_c": 0.7408571428571429, "calib/mu_w": 0.7054716981132075, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.15650406504065043, "calib/std_conf": 0.08714203284614376, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 626.2734375, "completions/mean_terminated_length": 636.2142944335938, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.10026666666666667, "grad_norm": 0.006550987716764212, "learning_rate": 2.944444444444445e-06, "loss": 0.0304, "num_tokens": 22103390.0, "reward": 1.3804006576538086, "reward_std": 0.38110488653182983, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7061136960983276, "rewards/format_reward_step": 0.953125, "step": 94 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5633333333333334, "calib/avg_num_step_conf": 10.34375, "calib/ece": 0.12207843137254909, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09803921568627451, "calib/gap": 0.023333333333333428, "calib/mean_conf": 0.7684313725490197, "calib/mu_c": 0.7766666666666667, "calib/mu_w": 0.7533333333333333, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1217254901960785, "calib/std_conf": 0.09572371495088593, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 664.5703125, "completions/mean_terminated_length": 664.5703125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.10133333333333333, "grad_norm": 0.005554381292313337, "learning_rate": 2.916666666666667e-06, "loss": 0.0502, "num_tokens": 22379648.0, "reward": 1.5141574144363403, "reward_std": 0.3407779932022095, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7509710788726807, "rewards/format_reward_step": 0.98828125, "step": 95 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5033395176252319, "calib/avg_num_step_conf": 9.828125, "calib/ece": 0.0899424603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.010525974025974105, "calib/mean_conf": 0.7712123015873016, "calib/mu_c": 0.7744285714285715, "calib/mu_w": 0.7639025974025974, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08335515873015874, "calib/std_conf": 0.10216301344872772, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2655.0, "completions/max_terminated_length": 2655.0, "completions/mean_length": 610.5390625, "completions/mean_terminated_length": 612.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 40.0, "epoch": 0.1024, "grad_norm": 0.007186867296695709, "learning_rate": 2.888888888888889e-06, "loss": -0.0203, "num_tokens": 22641762.0, "reward": 1.5587904453277588, "reward_std": 0.2784029245376587, "rewards/accuracy_reward_step": 0.6875, "rewards/final_brier_reward_step": 0.7621119618415833, "rewards/format_reward_step": 0.98046875, "step": 96 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.45097250167672703, "calib/avg_num_step_conf": 10.54296875, "calib/ece": 0.24398785425101216, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.13765182186234817, "calib/gap": -0.023768276324614313, "calib/mean_conf": 0.8047165991902834, "calib/mu_c": 0.7946126760563381, "calib/mu_w": 0.8183809523809524, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2369028340080972, "calib/std_conf": 0.08963330971406133, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 684.12109375, "completions/mean_terminated_length": 684.12109375, "completions/min_length": 213.0, "completions/min_terminated_length": 213.0, "epoch": 0.10346666666666667, "grad_norm": 0.005954548250883818, "learning_rate": 2.861111111111111e-06, "loss": 0.077, "num_tokens": 22921969.0, "reward": 1.3565677404403687, "reward_std": 0.4397056996822357, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6506354808807373, "rewards/format_reward_step": 0.953125, "step": 97 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4760621229623925, "calib/avg_num_step_conf": 9.5390625, "calib/ece": 0.22209486166007905, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09881422924901186, "calib/gap": -0.0055044281863689815, "calib/mean_conf": 0.8031225296442688, "calib/mu_c": 0.8008163265306122, "calib/mu_w": 0.8063207547169812, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22209486166007905, "calib/std_conf": 0.07154514872892766, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2874.0, "completions/max_terminated_length": 2874.0, "completions/mean_length": 654.6484375, "completions/mean_terminated_length": 657.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.10453333333333334, "grad_norm": 0.005915526766330004, "learning_rate": 2.8333333333333335e-06, "loss": -0.0004, "num_tokens": 23195743.0, "reward": 1.4139814376831055, "reward_std": 0.31640613079071045, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6912441253662109, "rewards/format_reward_step": 0.98828125, "step": 98 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5610806047047657, "calib/avg_num_step_conf": 10.80859375, "calib/ece": 0.43210483870967736, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.21774193548387097, "calib/gap": 0.017038709240051464, "calib/mean_conf": 0.8312983870967742, "calib/mu_c": 0.8415353535353535, "calib/mu_w": 0.824496644295302, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.43210483870967736, "calib/std_conf": 0.08582958357555616, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 757.18359375, "completions/mean_terminated_length": 766.162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1056, "grad_norm": 0.005351560655981302, "learning_rate": 2.805555555555556e-06, "loss": 0.0249, "num_tokens": 23495382.0, "reward": 1.1461924314498901, "reward_std": 0.4089640974998474, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.5541036128997803, "rewards/format_reward_step": 0.96484375, "step": 99 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5243972445464983, "calib/avg_num_step_conf": 10.3515625, "calib/ece": 0.3070498007968127, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.23904382470119523, "calib/gap": 0.006796051792320235, "calib/mean_conf": 0.8330298804780877, "calib/mu_c": 0.8361977611940299, "calib/mu_w": 0.8294017094017097, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3031075697211155, "calib/std_conf": 0.10237179380531525, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2322.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 713.4375, "completions/mean_terminated_length": 716.2353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 290.0, "epoch": 0.10666666666666667, "grad_norm": 0.005961840972304344, "learning_rate": 2.7777777777777783e-06, "loss": 0.0355, "num_tokens": 23785430.0, "reward": 1.3258066177368164, "reward_std": 0.4145909547805786, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6320819854736328, "rewards/format_reward_step": 0.96484375, "step": 100 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5445255942084695, "calib/avg_num_step_conf": 12.74609375, "calib/ece": 0.354, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.372, "calib/gap": 0.013453776667307182, "calib/mean_conf": 0.87, "calib/mu_c": 0.8765116279069767, "calib/mu_w": 0.8630578512396695, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.354, "calib/std_conf": 0.08025459488403139, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 791.40234375, "completions/mean_terminated_length": 794.5059204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.10773333333333333, "grad_norm": 0.005466044880449772, "learning_rate": 2.7500000000000004e-06, "loss": 0.031, "num_tokens": 24095021.0, "reward": 1.2949095964431763, "reward_std": 0.42435914278030396, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6093503832817078, "rewards/format_reward_step": 0.97265625, "step": 101 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5355297157622739, "calib/avg_num_step_conf": 11.546875, "calib/ece": 0.19378418972332023, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.34782608695652173, "calib/gap": 0.010083304622451772, "calib/mean_conf": 0.8606822134387352, "calib/mu_c": 0.8639104651162791, "calib/mu_w": 0.8538271604938273, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18731225296442694, "calib/std_conf": 0.10375497530735353, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 678.75, "completions/mean_terminated_length": 678.75, "completions/min_length": 215.0, "completions/min_terminated_length": 215.0, "epoch": 0.1088, "grad_norm": 0.00616810005158186, "learning_rate": 2.7222222222222224e-06, "loss": 0.0061, "num_tokens": 24375477.0, "reward": 1.5273957252502441, "reward_std": 0.347787082195282, "rewards/accuracy_reward_step": 0.671875, "rewards/final_brier_reward_step": 0.7305728197097778, "rewards/format_reward_step": 0.98046875, "step": 102 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5376986690786922, "calib/avg_num_step_conf": 11.58984375, "calib/ece": 0.3079418326693227, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3784860557768924, "calib/gap": 0.015482484817159414, "calib/mean_conf": 0.8658828685258965, "calib/mu_c": 0.8726063380281688, "calib/mu_w": 0.8571238532110094, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3040438247011952, "calib/std_conf": 0.11550548211985302, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 799.2421875, "completions/mean_terminated_length": 805.535400390625, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.10986666666666667, "grad_norm": 0.005662613082677126, "learning_rate": 2.6944444444444444e-06, "loss": -0.0035, "num_tokens": 24684635.0, "reward": 1.3617302179336548, "reward_std": 0.30840951204299927, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.64142906665802, "rewards/format_reward_step": 0.97265625, "step": 103 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5894255050505051, "calib/avg_num_step_conf": 11.5703125, "calib/ece": 0.4127753968253968, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5198412698412699, "calib/gap": 0.02779439393939398, "calib/mean_conf": 0.8811293650793651, "calib/mu_c": 0.8956883333333334, "calib/mu_w": 0.8678939393939394, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40885714285714286, "calib/std_conf": 0.12274708386764051, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 698.78515625, "completions/mean_terminated_length": 704.2874145507812, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.11093333333333333, "grad_norm": 0.005730877164751291, "learning_rate": 2.666666666666667e-06, "loss": -0.0146, "num_tokens": 24970204.0, "reward": 1.2464662790298462, "reward_std": 0.3567374646663666, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5749638080596924, "rewards/format_reward_step": 0.98046875, "step": 104 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.43374999999999997, "calib/avg_num_step_conf": 12.66796875, "calib/ece": 0.41568119999999986, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.608, "calib/gap": 0.01303294871794869, "calib/mean_conf": 0.8956812, "calib/mu_c": 0.9024583333333334, "calib/mu_w": 0.8894253846153847, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.41568119999999986, "calib/std_conf": 0.13889352787858764, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 752.65234375, "completions/mean_terminated_length": 758.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.112, "grad_norm": 0.005838092416524887, "learning_rate": 2.6388888888888893e-06, "loss": 0.0502, "num_tokens": 25268643.0, "reward": 1.2328240871429443, "reward_std": 0.4923657774925232, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5515856742858887, "rewards/format_reward_step": 0.9765625, "step": 105 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5479192938209332, "calib/avg_num_step_conf": 12.51953125, "calib/ece": 0.4075412698412699, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.7063492063492064, "calib/gap": 0.01662219419924338, "calib/mean_conf": 0.9234142857142859, "calib/mu_c": 0.9314615384615385, "calib/mu_w": 0.9148393442622951, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4075412698412699, "calib/std_conf": 0.08077340705938965, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 714.91796875, "completions/mean_terminated_length": 717.7216186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 294.0, "epoch": 0.11306666666666666, "grad_norm": 0.0060480451211333275, "learning_rate": 2.6111111111111113e-06, "loss": 0.0325, "num_tokens": 25556246.0, "reward": 1.2863620519638062, "reward_std": 0.2843177318572998, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5766304135322571, "rewards/format_reward_step": 0.98046875, "step": 106 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.39340350877192987, "calib/avg_num_step_conf": 13.11328125, "calib/ece": 0.33019795918367345, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.7306122448979592, "calib/gap": -0.02555140350877183, "calib/mean_conf": 0.9202510204081633, "calib/mu_c": 0.9103433333333334, "calib/mu_w": 0.9358947368421052, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3191020408163265, "calib/std_conf": 0.12222855293269241, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 729.59375, "completions/mean_terminated_length": 741.1746215820312, "completions/min_length": 0.0, "completions/min_terminated_length": 270.0, "epoch": 0.11413333333333334, "grad_norm": 0.005714102182537317, "learning_rate": 2.5833333333333337e-06, "loss": 0.0317, "num_tokens": 25847638.0, "reward": 1.3682703971862793, "reward_std": 0.5019102096557617, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6115409135818481, "rewards/format_reward_step": 0.953125, "step": 107 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.519327731092437, "calib/avg_num_step_conf": 15.3046875, "calib/ece": 0.2787963562753036, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8704453441295547, "calib/gap": -0.010827341482047226, "calib/mean_conf": 0.9477947368421051, "calib/mu_c": 0.9444194117647059, "calib/mu_w": 0.9552467532467531, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26916599190283397, "calib/std_conf": 0.09214234901727124, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 808.41015625, "completions/mean_terminated_length": 817.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.1152, "grad_norm": 0.005012467037886381, "learning_rate": 2.5555555555555557e-06, "loss": 0.0467, "num_tokens": 26157823.0, "reward": 1.4845738410949707, "reward_std": 0.42959311604499817, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6800853610038757, "rewards/format_reward_step": 0.9609375, "step": 108 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5711924907456373, "calib/avg_num_step_conf": 14.2109375, "calib/ece": 0.45188130081300815, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.8699186991869918, "calib/gap": 0.023055962453728207, "calib/mean_conf": 0.9399479674796747, "calib/mu_c": 0.9515696721311475, "calib/mu_w": 0.9285137096774193, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.44794715447154476, "calib/std_conf": 0.11181130580413126, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 782.02734375, "completions/mean_terminated_length": 788.18505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.11626666666666667, "grad_norm": 0.005501654930412769, "learning_rate": 2.5277777777777778e-06, "loss": 0.0401, "num_tokens": 26462622.0, "reward": 1.2221993207931519, "reward_std": 0.30708974599838257, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5303360223770142, "rewards/format_reward_step": 0.9609375, "step": 109 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.44938884644767, "calib/avg_num_step_conf": 11.86328125, "calib/ece": 0.42629402390438237, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8725099601593626, "calib/gap": -0.009233906289788707, "calib/mean_conf": 0.9443035856573706, "calib/mu_c": 0.9399257575757576, "calib/mu_w": 0.9491596638655463, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4223505976095617, "calib/std_conf": 0.07210492870889737, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 708.6640625, "completions/mean_terminated_length": 708.6640625, "completions/min_length": 230.0, "completions/min_terminated_length": 230.0, "epoch": 0.11733333333333333, "grad_norm": 0.0059098368510603905, "learning_rate": 2.5e-06, "loss": 0.055, "num_tokens": 26748960.0, "reward": 1.2810180187225342, "reward_std": 0.4634544253349304, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5542237758636475, "rewards/format_reward_step": 0.9765625, "step": 110 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47879353233830846, "calib/avg_num_step_conf": 12.7890625, "calib/ece": 0.43950433070866135, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8976377952755905, "calib/gap": -0.015294900497512587, "calib/mean_conf": 0.9358893700787401, "calib/mu_c": 0.9286634328358208, "calib/mu_w": 0.9439583333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4239173228346456, "calib/std_conf": 0.1372516999493022, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2435.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 712.34375, "completions/mean_terminated_length": 715.1372680664062, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.1184, "grad_norm": 0.005665524862706661, "learning_rate": 2.4722222222222226e-06, "loss": 0.027, "num_tokens": 27038728.0, "reward": 1.2901296615600586, "reward_std": 0.3616183400154114, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5490092039108276, "rewards/format_reward_step": 0.984375, "step": 111 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5591823491239455, "calib/avg_num_step_conf": 13.7421875, "calib/ece": 0.4081048192771083, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9076305220883534, "calib/gap": 0.035507482154445213, "calib/mean_conf": 0.9391240963855422, "calib/mu_c": 0.9555231343283582, "calib/mu_w": 0.920015652173913, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4045381526104416, "calib/std_conf": 0.13779499754009591, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 755.484375, "completions/mean_terminated_length": 764.4426879882812, "completions/min_length": 0.0, "completions/min_terminated_length": 277.0, "epoch": 0.11946666666666667, "grad_norm": 0.005625101737678051, "learning_rate": 2.4444444444444447e-06, "loss": 0.0066, "num_tokens": 27340052.0, "reward": 1.294386386871338, "reward_std": 0.47233307361602783, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5731478929519653, "rewards/format_reward_step": 0.96875, "step": 112 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5480958738635641, "calib/avg_num_step_conf": 12.6796875, "calib/ece": 0.3921555118110236, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9448818897637795, "calib/gap": -0.009733772013478359, "calib/mean_conf": 0.9474507874015748, "calib/mu_c": 0.9433503401360546, "calib/mu_w": 0.9530841121495329, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3804330708661417, "calib/std_conf": 0.10971966272798954, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2566.0, "completions/max_terminated_length": 2566.0, "completions/mean_length": 671.3515625, "completions/mean_terminated_length": 671.3515625, "completions/min_length": 278.0, "completions/min_terminated_length": 278.0, "epoch": 0.12053333333333334, "grad_norm": 0.0059531074948608875, "learning_rate": 2.4166666666666667e-06, "loss": -0.007, "num_tokens": 27617118.0, "reward": 1.3696892261505127, "reward_std": 0.4173819124698639, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5987532734870911, "rewards/format_reward_step": 0.9921875, "step": 113 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5548615676820805, "calib/avg_num_step_conf": 12.5859375, "calib/ece": 0.2909612000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.948, "calib/gap": 0.013789853166776278, "calib/mean_conf": 0.9548812, "calib/mu_c": 0.9593491124260355, "calib/mu_w": 0.9455592592592592, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28492120000000004, "calib/std_conf": 0.09330122317826278, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 644.24609375, "completions/mean_terminated_length": 654.4722900390625, "completions/min_length": 0.0, "completions/min_terminated_length": 251.0, "epoch": 0.1216, "grad_norm": 0.005589292850345373, "learning_rate": 2.388888888888889e-06, "loss": 0.003, "num_tokens": 27887069.0, "reward": 1.488504409790039, "reward_std": 0.39160531759262085, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6840401887893677, "rewards/format_reward_step": 0.97265625, "step": 114 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5957170668397144, "calib/avg_num_step_conf": 12.88671875, "calib/ece": 0.4991056224899598, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9156626506024096, "calib/gap": 0.007179935107073376, "calib/mean_conf": 0.9393682730923694, "calib/mu_c": 0.9432321739130435, "calib/mu_w": 0.9360522388059701, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4883132530120482, "calib/std_conf": 0.1402333299352498, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2767.0, "completions/max_terminated_length": 2767.0, "completions/mean_length": 661.30859375, "completions/mean_terminated_length": 669.1502075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.12266666666666666, "grad_norm": 0.006005452014505863, "learning_rate": 2.361111111111111e-06, "loss": 0.013, "num_tokens": 28161628.0, "reward": 1.1822772026062012, "reward_std": 0.46157434582710266, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.4934607148170471, "rewards/format_reward_step": 0.97265625, "step": 115 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5137931034482758, "calib/avg_num_step_conf": 13.109375, "calib/ece": 0.4208764940239045, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9362549800796812, "calib/gap": 0.006124521072796929, "calib/mean_conf": 0.9587250996015936, "calib/mu_c": 0.9615555555555554, "calib/mu_w": 0.9554310344827585, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4208764940239045, "calib/std_conf": 0.06891278719175344, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 712.015625, "completions/mean_terminated_length": 720.4585571289062, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.12373333333333333, "grad_norm": 0.005244911182671785, "learning_rate": 2.3333333333333336e-06, "loss": 0.0089, "num_tokens": 28448424.0, "reward": 1.2982820272445679, "reward_std": 0.3899441361427307, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5614078044891357, "rewards/format_reward_step": 0.98046875, "step": 116 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49435936219827314, "calib/avg_num_step_conf": 13.55078125, "calib/ece": 0.4792376984126985, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9603174603174603, "calib/gap": -0.00806044620911317, "calib/mean_conf": 0.9594765873015872, "calib/mu_c": 0.9553504065040651, "calib/mu_w": 0.9634108527131783, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4753095238095239, "calib/std_conf": 0.06617931904534732, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 691.11328125, "completions/mean_terminated_length": 691.11328125, "completions/min_length": 280.0, "completions/min_terminated_length": 280.0, "epoch": 0.1248, "grad_norm": 0.005855880212038755, "learning_rate": 2.305555555555556e-06, "loss": 0.0302, "num_tokens": 28731949.0, "reward": 1.2283642292022705, "reward_std": 0.4078208804130554, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.511415958404541, "rewards/format_reward_step": 0.984375, "step": 117 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47035748180955395, "calib/avg_num_step_conf": 13.6015625, "calib/ece": 0.3901968503937007, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.8937007874015748, "calib/gap": -0.0076817462828217, "calib/mean_conf": 0.9540551181102362, "calib/mu_c": 0.9507586206896552, "calib/mu_w": 0.9584403669724769, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3866929133858267, "calib/std_conf": 0.06832423937669099, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 721.875, "completions/mean_terminated_length": 721.875, "completions/min_length": 282.0, "completions/min_terminated_length": 282.0, "epoch": 0.12586666666666665, "grad_norm": 0.005256311967968941, "learning_rate": 2.277777777777778e-06, "loss": 0.0107, "num_tokens": 29020757.0, "reward": 1.3600351810455322, "reward_std": 0.31913137435913086, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5950707197189331, "rewards/format_reward_step": 0.9921875, "step": 118 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4731182795698925, "calib/avg_num_step_conf": 13.8359375, "calib/ece": 0.4785492, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.892, "calib/gap": -0.023923758320532618, "calib/mean_conf": 0.9348108, "calib/mu_c": 0.9227532258064516, "calib/mu_w": 0.9466769841269842, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.45868, "calib/std_conf": 0.15223729688666968, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2573.0, "completions/max_terminated_length": 2573.0, "completions/mean_length": 711.60546875, "completions/mean_terminated_length": 717.2086791992188, "completions/min_length": 0.0, "completions/min_terminated_length": 231.0, "epoch": 0.12693333333333334, "grad_norm": 0.006077486090362072, "learning_rate": 2.25e-06, "loss": 0.0325, "num_tokens": 29307992.0, "reward": 1.2276971340179443, "reward_std": 0.46560826897621155, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5100818276405334, "rewards/format_reward_step": 0.9765625, "step": 119 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5806209150326798, "calib/avg_num_step_conf": 12.56640625, "calib/ece": 0.34963730158730155, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8849206349206349, "calib/gap": 0.040613098039215756, "calib/mean_conf": 0.9286960317460315, "calib/mu_c": 0.9451346666666668, "calib/mu_w": 0.904521568627451, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.34154761904761904, "calib/std_conf": 0.14665715061163162, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 671.71484375, "completions/mean_terminated_length": 674.3490600585938, "completions/min_length": 0.0, "completions/min_terminated_length": 300.0, "epoch": 0.128, "grad_norm": 0.005706976167857647, "learning_rate": 2.222222222222222e-06, "loss": 0.0572, "num_tokens": 29586639.0, "reward": 1.3960473537445068, "reward_std": 0.37202006578445435, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6358448266983032, "rewards/format_reward_step": 0.984375, "step": 120 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5235796387520525, "calib/avg_num_step_conf": 13.55859375, "calib/ece": 0.3735147999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.892, "calib/gap": 0.012961346469622548, "calib/mean_conf": 0.9445652, "calib/mu_c": 0.9500089655172415, "calib/mu_w": 0.9370476190476189, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3690399999999999, "calib/std_conf": 0.0975125310355546, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 700.7578125, "completions/mean_terminated_length": 714.7171630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.12906666666666666, "grad_norm": 0.00541492085903883, "learning_rate": 2.1944444444444445e-06, "loss": -0.0367, "num_tokens": 29871089.0, "reward": 1.355072259902954, "reward_std": 0.44197413325309753, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6046760678291321, "rewards/format_reward_step": 0.97265625, "step": 121 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5131178958498546, "calib/avg_num_step_conf": 11.46875, "calib/ece": 0.33924387351778645, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": 0.004811994448850387, "calib/mean_conf": 0.934503162055336, "calib/mu_c": 0.9363480769230771, "calib/mu_w": 0.9315360824742267, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.32857312252964416, "calib/std_conf": 0.12044349268728828, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 666.66015625, "completions/mean_terminated_length": 666.66015625, "completions/min_length": 282.0, "completions/min_terminated_length": 282.0, "epoch": 0.13013333333333332, "grad_norm": 0.005828322377055883, "learning_rate": 2.166666666666667e-06, "loss": 0.0381, "num_tokens": 30149098.0, "reward": 1.4132450819015503, "reward_std": 0.28737032413482666, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6311777234077454, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5482349942653243, "calib/avg_num_step_conf": 12.73046875, "calib/ece": 0.4184075697211155, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8764940239043825, "calib/gap": 0.020026131005479897, "calib/mean_conf": 0.9303597609561753, "calib/mu_c": 0.9397744360902255, "calib/mu_w": 0.9197483050847456, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40944342629482067, "calib/std_conf": 0.13480568224847467, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2813.0, "completions/max_terminated_length": 2813.0, "completions/mean_length": 747.79296875, "completions/mean_terminated_length": 753.6810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 333.0, "epoch": 0.1312, "grad_norm": 0.0056311506778001785, "learning_rate": 2.138888888888889e-06, "loss": 0.013, "num_tokens": 30445821.0, "reward": 1.2952358722686768, "reward_std": 0.5198529958724976, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5709406137466431, "rewards/format_reward_step": 0.98046875, "step": 123 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5005504334663549, "calib/avg_num_step_conf": 11.125, "calib/ece": 0.2975486274509806, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9215686274509803, "calib/gap": 0.0043435117655155064, "calib/mean_conf": 0.9525298039215686, "calib/mu_c": 0.953994674556213, "calib/mu_w": 0.9496511627906975, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29366666666666685, "calib/std_conf": 0.09058859889070567, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2449.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 649.234375, "completions/mean_terminated_length": 649.234375, "completions/min_length": 267.0, "completions/min_terminated_length": 267.0, "epoch": 0.13226666666666667, "grad_norm": 0.0056220246478915215, "learning_rate": 2.1111111111111114e-06, "loss": 0.007, "num_tokens": 30718841.0, "reward": 1.4999858140945435, "reward_std": 0.350582480430603, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6835654973983765, "rewards/format_reward_step": 0.99609375, "step": 124 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4194749581083597, "calib/avg_num_step_conf": 11.40234375, "calib/ece": 0.4318712598425197, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8858267716535433, "calib/gap": 0.004049258362812558, "calib/mean_conf": 0.9476192913385828, "calib/mu_c": 0.9495801526717558, "calib/mu_w": 0.9455308943089432, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4318712598425197, "calib/std_conf": 0.09168418653680209, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2271.0, "completions/max_terminated_length": 2271.0, "completions/mean_length": 671.03515625, "completions/mean_terminated_length": 673.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.13333333333333333, "grad_norm": 0.005516288802027702, "learning_rate": 2.0833333333333334e-06, "loss": -0.0022, "num_tokens": 30995434.0, "reward": 1.2792880535125732, "reward_std": 0.4651259183883667, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5507635474205017, "rewards/format_reward_step": 0.984375, "step": 125 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5485616010006255, "calib/avg_num_step_conf": 11.75390625, "calib/ece": 0.48078458498023713, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.8972332015810277, "calib/gap": -0.005174796747967503, "calib/mean_conf": 0.9434841897233202, "calib/mu_c": 0.9408252032520326, "calib/mu_w": 0.9460000000000001, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4690513833992095, "calib/std_conf": 0.12410852698103679, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 694.0390625, "completions/mean_terminated_length": 694.0390625, "completions/min_length": 286.0, "completions/min_terminated_length": 286.0, "epoch": 0.1344, "grad_norm": 0.0055323997512459755, "learning_rate": 2.0555555555555555e-06, "loss": 0.0193, "num_tokens": 31278572.0, "reward": 1.2310841083526611, "reward_std": 0.33184194564819336, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5168557167053223, "rewards/format_reward_step": 0.984375, "step": 126 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49639408002006774, "calib/avg_num_step_conf": 11.26171875, "calib/ece": 0.4167588932806323, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.849802371541502, "calib/gap": 0.010983945817133023, "calib/mean_conf": 0.9410276679841898, "calib/mu_c": 0.9461940298507463, "calib/mu_w": 0.9352100840336133, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4140711462450592, "calib/std_conf": 0.10207981079543622, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 605.3984375, "completions/mean_terminated_length": 610.1653442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.13546666666666668, "grad_norm": 0.006298969965428114, "learning_rate": 2.027777777777778e-06, "loss": -0.0012, "num_tokens": 31537226.0, "reward": 1.302546501159668, "reward_std": 0.4561861753463745, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5699367523193359, "rewards/format_reward_step": 0.98828125, "step": 127 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49363779527559054, "calib/avg_num_step_conf": 9.62109375, "calib/ece": 0.440357142857143, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.8650793650793651, "calib/gap": 0.01874204724409445, "calib/mean_conf": 0.9443253968253968, "calib/mu_c": 0.9536220472440944, "calib/mu_w": 0.9348799999999999, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.440357142857143, "calib/std_conf": 0.08828132605259412, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2822.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 650.828125, "completions/mean_terminated_length": 655.9527587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.13653333333333334, "grad_norm": 0.0057527353055775166, "learning_rate": 2.0000000000000003e-06, "loss": 0.0029, "num_tokens": 31810502.0, "reward": 1.2602683305740356, "reward_std": 0.4331348240375519, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5478804707527161, "rewards/format_reward_step": 0.98046875, "step": 128 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6097074468085107, "calib/avg_num_step_conf": 10.48828125, "calib/ece": 0.3249862204724409, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9015748031496063, "calib/gap": 0.03441356382978744, "calib/mean_conf": 0.9410767716535433, "calib/mu_c": 0.9538125000000001, "calib/mu_w": 0.9193989361702126, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31807086614173224, "calib/std_conf": 0.134027670091693, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2093.0, "completions/max_terminated_length": 2093.0, "completions/mean_length": 579.73046875, "completions/mean_terminated_length": 582.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 248.0, "epoch": 0.1376, "grad_norm": 0.005614932626485825, "learning_rate": 1.9722222222222224e-06, "loss": -0.0057, "num_tokens": 32061297.0, "reward": 1.4486931562423706, "reward_std": 0.31342941522598267, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.659105122089386, "rewards/format_reward_step": 0.98828125, "step": 129 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5679012345679012, "calib/avg_num_step_conf": 9.734375, "calib/ece": 0.32929296875000014, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.91015625, "calib/gap": 0.013321907013396306, "calib/mean_conf": 0.95108984375, "calib/mu_c": 0.9559814814814814, "calib/mu_w": 0.9426595744680851, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.3237851562500001, "calib/std_conf": 0.09305150064265802, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 583.7109375, "completions/mean_terminated_length": 586.0000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.13866666666666666, "grad_norm": 0.005677498877048492, "learning_rate": 1.944444444444445e-06, "loss": -0.0185, "num_tokens": 32316015.0, "reward": 1.4557148218154907, "reward_std": 0.33224961161613464, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6575233936309814, "rewards/format_reward_step": 0.98828125, "step": 130 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5884824518042511, "calib/avg_num_step_conf": 8.578125, "calib/ece": 0.47670509803921574, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.8745098039215686, "calib/gap": 0.02879873949579803, "calib/mean_conf": 0.9433717647058824, "calib/mu_c": 0.9587310924369746, "calib/mu_w": 0.9299323529411766, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.47670509803921574, "calib/std_conf": 0.09973483071460103, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 558.2265625, "completions/mean_terminated_length": 558.2265625, "completions/min_length": 235.0, "completions/min_terminated_length": 235.0, "epoch": 0.13973333333333332, "grad_norm": 0.006135825999081135, "learning_rate": 1.916666666666667e-06, "loss": 0.0184, "num_tokens": 32565129.0, "reward": 1.2259846925735474, "reward_std": 0.2734970450401306, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5261881947517395, "rewards/format_reward_step": 0.99609375, "step": 131 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44361143150070487, "calib/avg_num_step_conf": 11.02734375, "calib/ece": 0.36406784313725493, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9058823529411765, "calib/gap": -0.0017071895424835004, "calib/mean_conf": 0.9563070588235294, "calib/mu_c": 0.955624183006536, "calib/mu_w": 0.9573313725490195, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3601874509803922, "calib/std_conf": 0.08176965848169004, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 652.91015625, "completions/mean_terminated_length": 652.91015625, "completions/min_length": 226.0, "completions/min_terminated_length": 226.0, "epoch": 0.1408, "grad_norm": 0.005297274794429541, "learning_rate": 1.888888888888889e-06, "loss": 0.0382, "num_tokens": 32837866.0, "reward": 1.399484395980835, "reward_std": 0.4310172200202942, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6153749227523804, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5575564971751412, "calib/avg_num_step_conf": 10.48046875, "calib/ece": 0.48669360000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.924, "calib/gap": 0.010572688751926074, "calib/mean_conf": 0.9507744, "calib/mu_c": 0.956356779661017, "calib/mu_w": 0.9457840909090909, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48273400000000005, "calib/std_conf": 0.10007373953560444, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 680.65625, "completions/mean_terminated_length": 686.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.14186666666666667, "grad_norm": 0.005452999845147133, "learning_rate": 1.8611111111111113e-06, "loss": 0.0281, "num_tokens": 33118458.0, "reward": 1.2015693187713623, "reward_std": 0.4506909251213074, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5047012567520142, "rewards/format_reward_step": 0.9765625, "step": 133 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5606641123882503, "calib/avg_num_step_conf": 10.28125, "calib/ece": 0.40577091633466145, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.8685258964143426, "calib/gap": 0.029709316730523683, "calib/mean_conf": 0.935902390438247, "calib/mu_c": 0.9496325925925926, "calib/mu_w": 0.9199232758620689, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.40191235059760966, "calib/std_conf": 0.12729638062483245, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2269.0, "completions/max_terminated_length": 2269.0, "completions/mean_length": 673.69921875, "completions/mean_terminated_length": 679.00390625, "completions/min_length": 0.0, "completions/min_terminated_length": 267.0, "epoch": 0.14293333333333333, "grad_norm": 0.005468668416142464, "learning_rate": 1.8333333333333333e-06, "loss": 0.0097, "num_tokens": 33399877.0, "reward": 1.2890727519989014, "reward_std": 0.4803275167942047, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5664268136024475, "rewards/format_reward_step": 0.95703125, "step": 134 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5161622879946792, "calib/avg_num_step_conf": 10.34765625, "calib/ece": 0.35341269841269835, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9325396825396826, "calib/gap": -0.0033269038909209403, "calib/mean_conf": 0.9596031746031746, "calib/mu_c": 0.9583225806451614, "calib/mu_w": 0.9616494845360823, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3489682539682539, "calib/std_conf": 0.06291718913249854, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 627.140625, "completions/mean_terminated_length": 629.6000366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.144, "grad_norm": 0.005314249079674482, "learning_rate": 1.8055555555555557e-06, "loss": 0.0276, "num_tokens": 33666305.0, "reward": 1.41217041015625, "reward_std": 0.3732190430164337, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6290280818939209, "rewards/format_reward_step": 0.984375, "step": 135 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4833798449612403, "calib/avg_num_step_conf": 10.05859375, "calib/ece": 0.479782283464567, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9566929133858267, "calib/gap": -0.0059959255813955, "calib/mean_conf": 0.9563515748031497, "calib/mu_c": 0.9533063999999999, "calib/mu_w": 0.9593023255813954, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.47200393700787413, "calib/std_conf": 0.10249409613053076, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 607.0859375, "completions/mean_terminated_length": 607.0859375, "completions/min_length": 246.0, "completions/min_terminated_length": 246.0, "epoch": 0.14506666666666668, "grad_norm": 0.005611001048237085, "learning_rate": 1.777777777777778e-06, "loss": 0.0166, "num_tokens": 33930207.0, "reward": 1.2385427951812744, "reward_std": 0.41841921210289, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5122418403625488, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5514235544964254, "calib/avg_num_step_conf": 10.6875, "calib/ece": 0.43175889328063244, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9169960474308301, "calib/gap": 0.01304151511350804, "calib/mean_conf": 0.9614031620553359, "calib/mu_c": 0.9675373134328357, "calib/mu_w": 0.9544957983193276, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43175889328063244, "calib/std_conf": 0.0681815432199302, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2791.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 624.45703125, "completions/mean_terminated_length": 624.45703125, "completions/min_length": 259.0, "completions/min_terminated_length": 259.0, "epoch": 0.14613333333333334, "grad_norm": 0.004993810784071684, "learning_rate": 1.75e-06, "loss": 0.0374, "num_tokens": 34197052.0, "reward": 1.2950917482376099, "reward_std": 0.3534542918205261, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5589334964752197, "rewards/format_reward_step": 0.984375, "step": 137 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5824561403508771, "calib/avg_num_step_conf": 9.80078125, "calib/ece": 0.4074015748031496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9291338582677166, "calib/gap": 0.026552631578947383, "calib/mean_conf": 0.9585826771653543, "calib/mu_c": 0.9705, "calib/mu_w": 0.9439473684210526, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4074015748031496, "calib/std_conf": 0.08088194380563357, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1978.0, "completions/max_terminated_length": 1978.0, "completions/mean_length": 596.3125, "completions/mean_terminated_length": 596.3125, "completions/min_length": 213.0, "completions/min_terminated_length": 213.0, "epoch": 0.1472, "grad_norm": 0.007067054510116577, "learning_rate": 1.7222222222222224e-06, "loss": 0.0312, "num_tokens": 34454044.0, "reward": 1.341176986694336, "reward_std": 0.48980024456977844, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5886040925979614, "rewards/format_reward_step": 0.9921875, "step": 138 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.554967105263158, "calib/avg_num_step_conf": 9.2265625, "calib/ece": 0.3339145098039215, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9647058823529412, "calib/gap": 0.028209736842105593, "calib/mean_conf": 0.9613654901960783, "calib/mu_c": 0.971875, "calib/mu_w": 0.9436652631578945, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3339145098039215, "calib/std_conf": 0.08533164312772462, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1365.0, "completions/max_terminated_length": 1365.0, "completions/mean_length": 557.48828125, "completions/mean_terminated_length": 559.674560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 221.0, "epoch": 0.14826666666666666, "grad_norm": 0.005635618232190609, "learning_rate": 1.6944444444444446e-06, "loss": 0.0034, "num_tokens": 34699857.0, "reward": 1.4520823955535889, "reward_std": 0.3601229190826416, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.6580711007118225, "rewards/format_reward_step": 0.99609375, "step": 139 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5678921568627451, "calib/avg_num_step_conf": 9.73046875, "calib/ece": 0.3163177865612648, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9762845849802372, "calib/gap": -0.0021319327731089688, "calib/mean_conf": 0.9647019762845851, "calib/mu_c": 0.9639857142857143, "calib/mu_w": 0.9661176470588233, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.30849407114624505, "calib/std_conf": 0.08933853961267735, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 607.1875, "completions/mean_terminated_length": 609.5686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.14933333333333335, "grad_norm": 0.005778952967375517, "learning_rate": 1.6666666666666667e-06, "loss": -0.0124, "num_tokens": 34960313.0, "reward": 1.485205888748169, "reward_std": 0.32548052072525024, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.6696308255195618, "rewards/format_reward_step": 0.98828125, "step": 140 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6089225240168636, "calib/avg_num_step_conf": 11.28125, "calib/ece": 0.33322000000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.956, "calib/gap": 0.02745559471974568, "calib/mean_conf": 0.9692200000000001, "calib/mu_c": 0.9792138364779874, "calib/mu_w": 0.9517582417582418, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.33322000000000007, "calib/std_conf": 0.061144841156061565, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2215.0, "completions/max_terminated_length": 2215.0, "completions/mean_length": 672.8515625, "completions/mean_terminated_length": 680.830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 255.0, "epoch": 0.1504, "grad_norm": 0.004524087067693472, "learning_rate": 1.638888888888889e-06, "loss": -0.0155, "num_tokens": 35239659.0, "reward": 1.4273971319198608, "reward_std": 0.28316599130630493, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.647763192653656, "rewards/format_reward_step": 0.96484375, "step": 141 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5219184027777778, "calib/avg_num_step_conf": 11.35546875, "calib/ece": 0.48552637795275594, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9763779527559056, "calib/gap": -0.016247284226190484, "calib/mean_conf": 0.9697885826771654, "calib/mu_c": 0.9617289062500001, "calib/mu_w": 0.9779761904761906, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4756889763779528, "calib/std_conf": 0.09118466460222968, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 666.45703125, "completions/mean_terminated_length": 669.0706176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 244.0, "epoch": 0.15146666666666667, "grad_norm": 0.005157998763024807, "learning_rate": 1.6111111111111113e-06, "loss": 0.014, "num_tokens": 35515432.0, "reward": 1.248449683189392, "reward_std": 0.31556376814842224, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.5086181163787842, "rewards/format_reward_step": 0.98828125, "step": 142 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5193288590604027, "calib/avg_num_step_conf": 11.20703125, "calib/ece": 0.396444076305221, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9558232931726908, "calib/gap": -0.015604161073825673, "calib/mean_conf": 0.9514675702811246, "calib/mu_c": 0.9452008389261745, "calib/mu_w": 0.9608050000000001, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.37475903614457845, "calib/std_conf": 0.16117320310964403, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 677.8125, "completions/mean_terminated_length": 677.8125, "completions/min_length": 287.0, "completions/min_terminated_length": 287.0, "epoch": 0.15253333333333333, "grad_norm": 0.004777104593813419, "learning_rate": 1.5833333333333333e-06, "loss": 0.07, "num_tokens": 35796288.0, "reward": 1.3570444583892822, "reward_std": 0.4084314703941345, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5812764763832092, "rewards/format_reward_step": 0.96875, "step": 143 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5465800273597812, "calib/avg_num_step_conf": 11.62890625, "calib/ece": 0.3121875000000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.98046875, "calib/gap": 0.011863201094391207, "calib/mean_conf": 0.9762500000000001, "calib/mu_c": 0.980235294117647, "calib/mu_w": 0.9683720930232558, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3121875000000001, "calib/std_conf": 0.058563533875612396, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1914.0, "completions/max_terminated_length": 1914.0, "completions/mean_length": 648.1875, "completions/mean_terminated_length": 650.7294311523438, "completions/min_length": 0.0, "completions/min_terminated_length": 281.0, "epoch": 0.1536, "grad_norm": 0.00496349623426795, "learning_rate": 1.5555555555555558e-06, "loss": -0.0065, "num_tokens": 36066352.0, "reward": 1.502691388130188, "reward_std": 0.3970443606376648, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6811640858650208, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5132053687400778, "calib/avg_num_step_conf": 12.39453125, "calib/ece": 0.3027482071713149, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9641434262948207, "calib/gap": 0.022921150238129684, "calib/mean_conf": 0.9606780876494024, "calib/mu_c": 0.9681662721893491, "calib/mu_w": 0.9452451219512195, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2950597609561755, "calib/std_conf": 0.13028630149214565, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2736.0, "completions/max_terminated_length": 2736.0, "completions/mean_length": 669.22265625, "completions/mean_terminated_length": 671.8471069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 254.0, "epoch": 0.15466666666666667, "grad_norm": 0.004919286817312241, "learning_rate": 1.527777777777778e-06, "loss": 0.0783, "num_tokens": 36340377.0, "reward": 1.4850218296051025, "reward_std": 0.48813188076019287, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.6731687784194946, "rewards/format_reward_step": 0.9765625, "step": 145 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5369150246305419, "calib/avg_num_step_conf": 10.796875, "calib/ece": 0.5162109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.95703125, "calib/gap": 0.0185541871921181, "calib/mean_conf": 0.9693359375000001, "calib/mu_c": 0.9794827586206896, "calib/mu_w": 0.9609285714285715, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5162109375, "calib/std_conf": 0.08117273031009918, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1681.0, "completions/max_terminated_length": 1681.0, "completions/mean_length": 630.99609375, "completions/mean_terminated_length": 633.4706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 283.0, "epoch": 0.15573333333333333, "grad_norm": 0.005344673991203308, "learning_rate": 1.5e-06, "loss": 0.0048, "num_tokens": 36609128.0, "reward": 1.1972899436950684, "reward_std": 0.4087854027748108, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.48833006620407104, "rewards/format_reward_step": 1.0, "step": 146 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5084745762711864, "calib/avg_num_step_conf": 12.453125, "calib/ece": 0.5325149606299213, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9566929133858267, "calib/gap": -0.02970306580259241, "calib/mean_conf": 0.9614141732283465, "calib/mu_c": 0.9455101694915253, "calib/mu_w": 0.9752132352941177, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5146811023622048, "calib/std_conf": 0.13888806274243998, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 692.4921875, "completions/mean_terminated_length": 692.4921875, "completions/min_length": 232.0, "completions/min_terminated_length": 232.0, "epoch": 0.1568, "grad_norm": 0.005198162980377674, "learning_rate": 1.4722222222222225e-06, "loss": 0.0296, "num_tokens": 36890086.0, "reward": 1.1903597116470337, "reward_std": 0.3408493995666504, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.46665698289871216, "rewards/format_reward_step": 0.9921875, "step": 147 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5111999423797176, "calib/avg_num_step_conf": 10.98828125, "calib/ece": 0.2854609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.9921875, "calib/gap": 0.007035148372227296, "calib/mean_conf": 0.9767890625000002, "calib/mu_c": 0.9789325842696629, "calib/mu_w": 0.9718974358974356, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28346875, "calib/std_conf": 0.06638796393451973, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1346.0, "completions/max_terminated_length": 1346.0, "completions/mean_length": 609.5234375, "completions/mean_terminated_length": 611.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.15786666666666666, "grad_norm": 0.006085440516471863, "learning_rate": 1.4444444444444445e-06, "loss": 0.0038, "num_tokens": 37151236.0, "reward": 1.5469896793365479, "reward_std": 0.3135468661785126, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7072604894638062, "rewards/format_reward_step": 0.99609375, "step": 148 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5226355611601512, "calib/avg_num_step_conf": 12.81640625, "calib/ece": 0.4488531746031745, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9484126984126984, "calib/gap": 0.03717339218158888, "calib/mean_conf": 0.9612341269841269, "calib/mu_c": 0.9792307692307693, "calib/mu_w": 0.9420573770491805, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.44710714285714276, "calib/std_conf": 0.12230895674005653, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 707.796875, "completions/mean_terminated_length": 716.1897583007812, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.15893333333333334, "grad_norm": 0.0053066350519657135, "learning_rate": 1.4166666666666667e-06, "loss": -0.0465, "num_tokens": 37436888.0, "reward": 1.2656923532485962, "reward_std": 0.3937472105026245, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5391972661018372, "rewards/format_reward_step": 0.9765625, "step": 149 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5774102079395086, "calib/avg_num_step_conf": 11.36328125, "calib/ece": 0.43694822134387373, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9723320158102767, "calib/gap": 0.008735362318840756, "calib/mean_conf": 0.9613916996047432, "calib/mu_c": 0.9653623188405799, "calib/mu_w": 0.9566269565217391, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4264426877470358, "calib/std_conf": 0.1391820122709795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2024.0, "completions/max_terminated_length": 2024.0, "completions/mean_length": 616.73046875, "completions/mean_terminated_length": 624.0435180664062, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.16, "grad_norm": 0.006326043512672186, "learning_rate": 1.3888888888888892e-06, "loss": -0.0309, "num_tokens": 37699731.0, "reward": 1.3119094371795654, "reward_std": 0.4123223125934601, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5574127435684204, "rewards/format_reward_step": 0.98828125, "step": 150 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.551948051948052, "calib/avg_num_step_conf": 12.76171875, "calib/ece": 0.537176, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.964, "calib/gap": 0.006650000000000045, "calib/mean_conf": 0.9771760000000002, "calib/mu_c": 0.9808999999999999, "calib/mu_w": 0.9742499999999998, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.537176, "calib/std_conf": 0.03331019399523215, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2972.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 721.48828125, "completions/mean_terminated_length": 727.1693115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 329.0, "epoch": 0.16106666666666666, "grad_norm": 0.004929741378873587, "learning_rate": 1.3611111111111112e-06, "loss": 0.0403, "num_tokens": 37991456.0, "reward": 1.1352100372314453, "reward_std": 0.42847442626953125, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.4501076936721802, "rewards/format_reward_step": 0.9609375, "step": 151 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5288242566204698, "calib/avg_num_step_conf": 12.8671875, "calib/ece": 0.5419180000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.968, "calib/gap": 0.005414177890558758, "calib/mean_conf": 0.9701620000000001, "calib/mu_c": 0.9732155963302751, "calib/mu_w": 0.9678014184397163, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.5380400000000001, "calib/std_conf": 0.101737577895289, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 694.65234375, "completions/mean_terminated_length": 700.1220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 285.0, "epoch": 0.16213333333333332, "grad_norm": 0.005465598776936531, "learning_rate": 1.3333333333333334e-06, "loss": 0.01, "num_tokens": 38274679.0, "reward": 1.1406099796295166, "reward_std": 0.4606872797012329, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.4491886496543884, "rewards/format_reward_step": 0.97265625, "step": 152 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5483958122255995, "calib/avg_num_step_conf": 14.19921875, "calib/ece": 0.40882113821138233, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.967479674796748, "calib/gap": 0.001979736575481228, "calib/mean_conf": 0.9753252032520326, "calib/mu_c": 0.9761702127659575, "calib/mu_w": 0.9741904761904763, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.405487804878049, "calib/std_conf": 0.058104335079578955, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 742.359375, "completions/mean_terminated_length": 760.176025390625, "completions/min_length": 0.0, "completions/min_terminated_length": 319.0, "epoch": 0.1632, "grad_norm": 0.005114991217851639, "learning_rate": 1.3055555555555556e-06, "loss": -0.0446, "num_tokens": 38572043.0, "reward": 1.3114057779312134, "reward_std": 0.4579676389694214, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5642179250717163, "rewards/format_reward_step": 0.95703125, "step": 153 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5874609130706692, "calib/avg_num_step_conf": 12.41015625, "calib/ece": 0.4963003952569171, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9762845849802372, "calib/gap": 0.007604940587867426, "calib/mean_conf": 0.973612648221344, "calib/mu_c": 0.977520325203252, "calib/mu_w": 0.9699153846153846, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.49187351778656135, "calib/std_conf": 0.09395912069491512, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2010.0, "completions/max_terminated_length": 2010.0, "completions/mean_length": 664.15625, "completions/mean_terminated_length": 669.3858032226562, "completions/min_length": 0.0, "completions/min_terminated_length": 300.0, "epoch": 0.16426666666666667, "grad_norm": 0.005510884802788496, "learning_rate": 1.2777777777777779e-06, "loss": 0.0088, "num_tokens": 38846507.0, "reward": 1.2234225273132324, "reward_std": 0.4460986852645874, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5015326738357544, "rewards/format_reward_step": 0.984375, "step": 154 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5880905511811023, "calib/avg_num_step_conf": 11.30078125, "calib/ece": 0.4793854901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9529411764705882, "calib/gap": 0.012873252952755654, "calib/mean_conf": 0.9646145098039216, "calib/mu_c": 0.9710763779527557, "calib/mu_w": 0.958203125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.47298039215686283, "calib/std_conf": 0.11337729821375137, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1501.0, "completions/max_terminated_length": 1501.0, "completions/mean_length": 646.44921875, "completions/mean_terminated_length": 648.984375, "completions/min_length": 0.0, "completions/min_terminated_length": 330.0, "epoch": 0.16533333333333333, "grad_norm": 0.0056763202883303165, "learning_rate": 1.25e-06, "loss": 0.0139, "num_tokens": 39119214.0, "reward": 1.2522039413452148, "reward_std": 0.3926677703857422, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5200329422950745, "rewards/format_reward_step": 0.9921875, "step": 155 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.531457916155287, "calib/avg_num_step_conf": 12.58984375, "calib/ece": 0.4307556, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.956, "calib/gap": 0.008647910341709197, "calib/mean_conf": 0.9708364000000002, "calib/mu_c": 0.9747452554744526, "calib/mu_w": 0.9660973451327434, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.426796, "calib/std_conf": 0.09248727736851162, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 739.82421875, "completions/mean_terminated_length": 739.82421875, "completions/min_length": 269.0, "completions/min_terminated_length": 269.0, "epoch": 0.1664, "grad_norm": 0.004702582489699125, "learning_rate": 1.2222222222222223e-06, "loss": 0.0402, "num_tokens": 39413369.0, "reward": 1.289594292640686, "reward_std": 0.3829977810382843, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5440323948860168, "rewards/format_reward_step": 0.95703125, "step": 156 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5537280701754386, "calib/avg_num_step_conf": 14.6171875, "calib/ece": 0.3767529880478087, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9721115537848606, "calib/gap": 0.007298644338117999, "calib/mean_conf": 0.9724501992031873, "calib/mu_c": 0.975328947368421, "calib/mu_w": 0.968030303030303, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37181274900398403, "calib/std_conf": 0.08823226608698645, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2668.0, "completions/max_terminated_length": 2668.0, "completions/mean_length": 750.16015625, "completions/mean_terminated_length": 750.16015625, "completions/min_length": 271.0, "completions/min_terminated_length": 271.0, "epoch": 0.16746666666666668, "grad_norm": 0.004526448901742697, "learning_rate": 1.1944444444444446e-06, "loss": 0.0226, "num_tokens": 39709138.0, "reward": 1.3890341520309448, "reward_std": 0.38522738218307495, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6100994944572449, "rewards/format_reward_step": 0.98046875, "step": 157 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5598376623376624, "calib/avg_num_step_conf": 11.703125, "calib/ece": 0.3835826771653544, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9763779527559056, "calib/gap": -0.007674025974026, "calib/mean_conf": 0.9770472440944882, "calib/mu_c": 0.974025974025974, "calib/mu_w": 0.9817, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3771653543307087, "calib/std_conf": 0.07349233805130645, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 687.171875, "completions/mean_terminated_length": 687.171875, "completions/min_length": 286.0, "completions/min_terminated_length": 286.0, "epoch": 0.16853333333333334, "grad_norm": 0.004906547721475363, "learning_rate": 1.1666666666666668e-06, "loss": 0.0358, "num_tokens": 39990294.0, "reward": 1.4026451110839844, "reward_std": 0.3692792057991028, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6099777221679688, "rewards/format_reward_step": 0.9921875, "step": 158 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5207578358446182, "calib/avg_num_step_conf": 12.1953125, "calib/ece": 0.41045275590551195, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9763779527559056, "calib/gap": -0.005047682624451766, "calib/mean_conf": 0.981003937007874, "calib/mu_c": 0.9788775510204082, "calib/mu_w": 0.98392523364486, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.40635826771653555, "calib/std_conf": 0.03595540742479003, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2050.0, "completions/max_terminated_length": 2050.0, "completions/mean_length": 680.3984375, "completions/mean_terminated_length": 683.0667114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 265.0, "epoch": 0.1696, "grad_norm": 0.005360930692404509, "learning_rate": 1.138888888888889e-06, "loss": -0.0095, "num_tokens": 40269260.0, "reward": 1.352133870124817, "reward_std": 0.32214194536209106, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5753616094589233, "rewards/format_reward_step": 0.98046875, "step": 159 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5068873221047134, "calib/avg_num_step_conf": 13.7421875, "calib/ece": 0.43642369477911636, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.9879518072289156, "calib/gap": -0.00706844888366609, "calib/mean_conf": 0.981487951807229, "calib/mu_c": 0.9783369565217391, "calib/mu_w": 0.9854054054054052, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4318473895582328, "calib/std_conf": 0.06343509992924913, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 756.81640625, "completions/mean_terminated_length": 765.79052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 327.0, "epoch": 0.17066666666666666, "grad_norm": 0.004979386460036039, "learning_rate": 1.111111111111111e-06, "loss": 0.012, "num_tokens": 40567845.0, "reward": 1.2913702726364136, "reward_std": 0.42069438099861145, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5397717952728271, "rewards/format_reward_step": 0.96484375, "step": 160 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5072447447447448, "calib/avg_num_step_conf": 12.1015625, "calib/ece": 0.28397598425196857, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9645669291338582, "calib/gap": -0.0014889339339341223, "calib/mean_conf": 0.9723232283464568, "calib/mu_c": 0.9718894444444443, "calib/mu_w": 0.9733783783783784, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2738188976377953, "calib/std_conf": 0.09951728123091752, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1905.0, "completions/max_terminated_length": 1905.0, "completions/mean_length": 668.1484375, "completions/mean_terminated_length": 673.409423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.17173333333333332, "grad_norm": 0.0053687444888055325, "learning_rate": 1.0833333333333335e-06, "loss": 0.0337, "num_tokens": 40842811.0, "reward": 1.5395169258117676, "reward_std": 0.32825204730033875, "rewards/accuracy_reward_step": 0.703125, "rewards/final_brier_reward_step": 0.6962214708328247, "rewards/format_reward_step": 0.9765625, "step": 161 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4775086505190311, "calib/avg_num_step_conf": 13.33984375, "calib/ece": 0.315764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9764705882352941, "calib/gap": 0.016235294117647348, "calib/mean_conf": 0.9743529411764705, "calib/mu_c": 0.979764705882353, "calib/mu_w": 0.9635294117647056, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3117254901960785, "calib/std_conf": 0.10062682438329817, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1819.0, "completions/max_terminated_length": 1819.0, "completions/mean_length": 708.5, "completions/mean_terminated_length": 711.2785034179688, "completions/min_length": 0.0, "completions/min_terminated_length": 328.0, "epoch": 0.1728, "grad_norm": 0.00477219931781292, "learning_rate": 1.0555555555555557e-06, "loss": 0.0052, "num_tokens": 41128331.0, "reward": 1.4949811697006226, "reward_std": 0.33206310868263245, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.6735562086105347, "rewards/format_reward_step": 0.98828125, "step": 162 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6011690647482015, "calib/avg_num_step_conf": 13.21484375, "calib/ece": 0.4339760956175301, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9760956175298805, "calib/gap": -0.0008663283658788545, "calib/mean_conf": 0.9803505976095618, "calib/mu_c": 0.9799640287769784, "calib/mu_w": 0.9808303571428573, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4302709163346615, "calib/std_conf": 0.060573065010961304, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 743.7265625, "completions/mean_terminated_length": 749.5827026367188, "completions/min_length": 0.0, "completions/min_terminated_length": 281.0, "epoch": 0.17386666666666667, "grad_norm": 0.004371706862002611, "learning_rate": 1.0277777777777777e-06, "loss": 0.0162, "num_tokens": 41423557.0, "reward": 1.3090941905975342, "reward_std": 0.34967905282974243, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5556883811950684, "rewards/format_reward_step": 0.9765625, "step": 163 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4665488347735009, "calib/avg_num_step_conf": 14.33984375, "calib/ece": 0.4535483870967743, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.967741935483871, "calib/gap": -0.019934537837130284, "calib/mean_conf": 0.9737903225806454, "calib/mu_c": 0.9646268656716417, "calib/mu_w": 0.984561403508772, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4435080645161292, "calib/std_conf": 0.09169631453824054, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 778.26953125, "completions/mean_terminated_length": 790.623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 391.0, "epoch": 0.17493333333333333, "grad_norm": 0.005314142443239689, "learning_rate": 1.0000000000000002e-06, "loss": 0.0191, "num_tokens": 41728930.0, "reward": 1.2675137519836426, "reward_std": 0.4203830361366272, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5272148251533508, "rewards/format_reward_step": 0.9609375, "step": 164 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5550458715596329, "calib/avg_num_step_conf": 12.74609375, "calib/ece": 0.553452380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.9682539682539683, "calib/gap": -0.006286649130685773, "calib/mean_conf": 0.967420634920635, "calib/mu_c": 0.9638532110091743, "calib/mu_w": 0.9701398601398601, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5441666666666667, "calib/std_conf": 0.12179907036777422, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 740.8125, "completions/mean_terminated_length": 746.6456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.176, "grad_norm": 0.005232121329754591, "learning_rate": 9.722222222222224e-07, "loss": 0.0414, "num_tokens": 42024154.0, "reward": 1.1319527626037598, "reward_std": 0.3429035246372223, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.43578046560287476, "rewards/format_reward_step": 0.9765625, "step": 165 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5238732708612227, "calib/avg_num_step_conf": 12.90625, "calib/ece": 0.31248097165991906, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.9635627530364372, "calib/gap": -0.007145381526104666, "calib/mean_conf": 0.9765311740890688, "calib/mu_c": 0.9741879518072288, "calib/mu_w": 0.9813333333333335, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30847368421052634, "calib/std_conf": 0.06627414997796408, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 759.67578125, "completions/mean_terminated_length": 768.683837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 380.0, "epoch": 0.17706666666666668, "grad_norm": 0.004472343251109123, "learning_rate": 9.444444444444445e-07, "loss": 0.0544, "num_tokens": 42324815.0, "reward": 1.4526236057281494, "reward_std": 0.3695136606693268, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6552475094795227, "rewards/format_reward_step": 0.953125, "step": 166 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5892742453436095, "calib/avg_num_step_conf": 13.49609375, "calib/ece": 0.2994488188976378, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9881889763779528, "calib/gap": 0.019660315421394348, "calib/mean_conf": 0.9805511811023622, "calib/mu_c": 0.9868208092485548, "calib/mu_w": 0.9671604938271604, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2994488188976378, "calib/std_conf": 0.05669099953025043, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2116.0, "completions/max_terminated_length": 2116.0, "completions/mean_length": 740.69140625, "completions/mean_terminated_length": 746.5236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 363.0, "epoch": 0.17813333333333334, "grad_norm": 0.004243503324687481, "learning_rate": 9.166666666666666e-07, "loss": 0.0064, "num_tokens": 42620040.0, "reward": 1.5143901109695435, "reward_std": 0.30221521854400635, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.692842960357666, "rewards/format_reward_step": 0.984375, "step": 167 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4753119913185025, "calib/avg_num_step_conf": 13.609375, "calib/ece": 0.36682730923694784, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9839357429718876, "calib/gap": 0.015961068909386866, "calib/mean_conf": 0.9772690763052209, "calib/mu_c": 0.9834868421052633, "calib/mu_w": 0.9675257731958764, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36682730923694784, "calib/std_conf": 0.07769632399670272, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 782.87109375, "completions/mean_terminated_length": 795.2976684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 277.0, "epoch": 0.1792, "grad_norm": 0.004571868572384119, "learning_rate": 8.88888888888889e-07, "loss": 0.0065, "num_tokens": 42925127.0, "reward": 1.3821645975112915, "reward_std": 0.4252123534679413, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6080793142318726, "rewards/format_reward_step": 0.96875, "step": 168 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.573837358684481, "calib/avg_num_step_conf": 12.76171875, "calib/ece": 0.4312709163346614, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9362549800796812, "calib/gap": 0.004950089928057344, "calib/mean_conf": 0.9405537848605579, "calib/mu_c": 0.9427625899280574, "calib/mu_w": 0.9378125, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.40901992031872514, "calib/std_conf": 0.19500897950419732, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2873.0, "completions/max_terminated_length": 2873.0, "completions/mean_length": 767.828125, "completions/mean_terminated_length": 773.8740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 239.0, "epoch": 0.18026666666666666, "grad_norm": 0.005218811333179474, "learning_rate": 8.611111111111112e-07, "loss": 0.0088, "num_tokens": 43225875.0, "reward": 1.3115191459655762, "reward_std": 0.36574727296829224, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5566322207450867, "rewards/format_reward_step": 0.98046875, "step": 169 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5763374485596708, "calib/avg_num_step_conf": 14.75, "calib/ece": 0.34105158730158724, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.9801587301587301, "calib/gap": 0.009709876543210094, "calib/mean_conf": 0.9839087301587301, "calib/mu_c": 0.9873765432098767, "calib/mu_w": 0.9776666666666666, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.34105158730158724, "calib/std_conf": 0.025344836109849323, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2817.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 787.78125, "completions/mean_terminated_length": 793.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 395.0, "epoch": 0.18133333333333335, "grad_norm": 0.004583289381116629, "learning_rate": 8.333333333333333e-07, "loss": -0.0022, "num_tokens": 43531699.0, "reward": 1.4404544830322266, "reward_std": 0.4104905426502228, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6426275968551636, "rewards/format_reward_step": 0.97265625, "step": 170 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.563218031968032, "calib/avg_num_step_conf": 12.4375, "calib/ece": 0.5390980392156863, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.984313725490196, "calib/gap": 0.013037587412587648, "calib/mean_conf": 0.9783137254901961, "calib/mu_c": 0.9856250000000001, "calib/mu_w": 0.9725874125874124, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5390980392156863, "calib/std_conf": 0.06584518092333413, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1634.0, "completions/max_terminated_length": 1634.0, "completions/mean_length": 705.03515625, "completions/mean_terminated_length": 707.800048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 312.0, "epoch": 0.1824, "grad_norm": 0.005645171273499727, "learning_rate": 8.055555555555557e-07, "loss": 0.0225, "num_tokens": 43819084.0, "reward": 1.1672158241271973, "reward_std": 0.4768272042274475, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.46333789825439453, "rewards/format_reward_step": 0.99609375, "step": 171 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5296686746987951, "calib/avg_num_step_conf": 13.9140625, "calib/ece": 0.3175487804878051, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9552845528455285, "calib/gap": -0.010208885542168478, "calib/mean_conf": 0.9712235772357725, "calib/mu_c": 0.9679036144578315, "calib/mu_w": 0.9781124999999999, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.30698780487804905, "calib/std_conf": 0.09647310386484517, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2968.0, "completions/max_terminated_length": 2968.0, "completions/mean_length": 714.68359375, "completions/mean_terminated_length": 728.9203491210938, "completions/min_length": 0.0, "completions/min_terminated_length": 332.0, "epoch": 0.18346666666666667, "grad_norm": 0.004878144711256027, "learning_rate": 7.777777777777779e-07, "loss": 0.0115, "num_tokens": 44105395.0, "reward": 1.445297360420227, "reward_std": 0.3872257471084595, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6445009708404541, "rewards/format_reward_step": 0.94921875, "step": 172 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4876770152505447, "calib/avg_num_step_conf": 13.21875, "calib/ece": 0.3702610441767071, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9759036144578314, "calib/gap": 0.0055453431372546325, "calib/mean_conf": 0.9758032128514057, "calib/mu_c": 0.9779411764705882, "calib/mu_w": 0.9723958333333336, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3658032128514059, "calib/std_conf": 0.08385274330610008, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 758.0703125, "completions/mean_terminated_length": 764.0393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 279.0, "epoch": 0.18453333333333333, "grad_norm": 0.0059276907704770565, "learning_rate": 7.5e-07, "loss": 0.0431, "num_tokens": 44402621.0, "reward": 1.3855817317962646, "reward_std": 0.3574729561805725, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6071008443832397, "rewards/format_reward_step": 0.96875, "step": 173 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5388752723311546, "calib/avg_num_step_conf": 13.3515625, "calib/ece": 0.41520491803278686, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9590163934426229, "calib/gap": 0.025065359477124516, "calib/mean_conf": 0.9725819672131147, "calib/mu_c": 0.9836764705882355, "calib/mu_w": 0.958611111111111, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.41520491803278686, "calib/std_conf": 0.08657587785080939, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 818.7421875, "completions/mean_terminated_length": 831.7381591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 369.0, "epoch": 0.1856, "grad_norm": 0.0047086006961762905, "learning_rate": 7.222222222222222e-07, "loss": 0.0036, "num_tokens": 44716451.0, "reward": 1.282027006149292, "reward_std": 0.5039170980453491, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5562413930892944, "rewards/format_reward_step": 0.9453125, "step": 174 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5316115702479338, "calib/avg_num_step_conf": 13.703125, "calib/ece": 0.5224586776859506, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.9545454545454546, "calib/gap": 0.009106060606060562, "calib/mean_conf": 0.9680785123966944, "calib/mu_c": 0.9730454545454545, "calib/mu_w": 0.963939393939394, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.5179958677685952, "calib/std_conf": 0.0969031106005789, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3007.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 788.08203125, "completions/mean_terminated_length": 806.9960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 332.0, "epoch": 0.18666666666666668, "grad_norm": 0.004811226390302181, "learning_rate": 6.944444444444446e-07, "loss": -0.012, "num_tokens": 45024024.0, "reward": 1.128366231918335, "reward_std": 0.40532705187797546, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.4559510350227356, "rewards/format_reward_step": 0.94140625, "step": 175 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5092234824878474, "calib/avg_num_step_conf": 13.39453125, "calib/ece": 0.4220392156862746, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9921568627450981, "calib/gap": 0.017257260376417616, "calib/mean_conf": 0.9789019607843139, "calib/mu_c": 0.9865492957746477, "calib/mu_w": 0.9692920353982301, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4220392156862746, "calib/std_conf": 0.08299876089364717, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2317.0, "completions/max_terminated_length": 2317.0, "completions/mean_length": 770.65234375, "completions/mean_terminated_length": 773.674560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 293.0, "epoch": 0.18773333333333334, "grad_norm": 0.00466120382770896, "learning_rate": 6.666666666666667e-07, "loss": 0.0033, "num_tokens": 45325375.0, "reward": 1.3379881381988525, "reward_std": 0.4600631594657898, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5744138360023499, "rewards/format_reward_step": 0.9921875, "step": 176 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5415127357573402, "calib/avg_num_step_conf": 12.40625, "calib/ece": 0.42604, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.98, "calib/gap": 0.0025024304880421067, "calib/mean_conf": 0.98004, "calib/mu_c": 0.9811510791366906, "calib/mu_w": 0.9786486486486485, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.42504, "calib/std_conf": 0.026525429308495652, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 726.93359375, "completions/mean_terminated_length": 735.5534057617188, "completions/min_length": 0.0, "completions/min_terminated_length": 379.0, "epoch": 0.1888, "grad_norm": 0.00513953622430563, "learning_rate": 6.388888888888889e-07, "loss": 0.0125, "num_tokens": 45615302.0, "reward": 1.3037654161453247, "reward_std": 0.335764080286026, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5528433322906494, "rewards/format_reward_step": 0.96875, "step": 177 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5136054421768707, "calib/avg_num_step_conf": 12.98828125, "calib/ece": 0.3749003984063746, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9880478087649402, "calib/gap": 0.000624249699880064, "calib/mean_conf": 0.9844621513944224, "calib/mu_c": 0.9847058823529413, "calib/mu_w": 0.9840816326530613, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3749003984063746, "calib/std_conf": 0.015968393887289656, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 736.109375, "completions/mean_terminated_length": 738.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 333.0, "epoch": 0.18986666666666666, "grad_norm": 0.004597798455506563, "learning_rate": 6.111111111111112e-07, "loss": 0.042, "num_tokens": 45909818.0, "reward": 1.3925690650939941, "reward_std": 0.4295831024646759, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6093569993972778, "rewards/format_reward_step": 0.98046875, "step": 178 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6071184821184822, "calib/avg_num_step_conf": 12.37109375, "calib/ece": 0.38009176788124155, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.9554655870445344, "calib/gap": 0.015821184821184886, "calib/mean_conf": 0.9738839406207827, "calib/mu_c": 0.9802252252252251, "calib/mu_w": 0.9644040404040403, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3773927125506073, "calib/std_conf": 0.05663986846172311, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 749.6796875, "completions/mean_terminated_length": 761.5794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 331.0, "epoch": 0.19093333333333334, "grad_norm": 0.005119526293128729, "learning_rate": 5.833333333333334e-07, "loss": 0.0142, "num_tokens": 46208000.0, "reward": 1.3591045141220093, "reward_std": 0.4644799828529358, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6010216474533081, "rewards/format_reward_step": 0.9609375, "step": 179 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5116135328562135, "calib/avg_num_step_conf": 14.2734375, "calib/ece": 0.4028685258964144, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9760956175298805, "calib/gap": 0.008314248536109181, "calib/mean_conf": 0.9805577689243029, "calib/mu_c": 0.9840689655172413, "calib/mu_w": 0.9757547169811321, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4028685258964144, "calib/std_conf": 0.0643856010835871, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2615.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 811.69140625, "completions/mean_terminated_length": 818.0827026367188, "completions/min_length": 0.0, "completions/min_terminated_length": 347.0, "epoch": 0.192, "grad_norm": 0.005072887521237135, "learning_rate": 5.555555555555555e-07, "loss": 0.0256, "num_tokens": 46519649.0, "reward": 1.3476653099060059, "reward_std": 0.31839191913604736, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5820492506027222, "rewards/format_reward_step": 0.98046875, "step": 180 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5585551806024247, "calib/avg_num_step_conf": 12.28515625, "calib/ece": 0.47905138339920955, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9762845849802372, "calib/gap": 0.006711348581427412, "calib/mean_conf": 0.9810276679841897, "calib/mu_c": 0.9843700787401575, "calib/mu_w": 0.9776587301587301, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.47905138339920955, "calib/std_conf": 0.019528311901133372, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2601.0, "completions/max_terminated_length": 2601.0, "completions/mean_length": 724.99609375, "completions/mean_terminated_length": 730.7047119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 366.0, "epoch": 0.19306666666666666, "grad_norm": 0.005564880557358265, "learning_rate": 5.277777777777779e-07, "loss": -0.019, "num_tokens": 46811512.0, "reward": 1.24691903591156, "reward_std": 0.421791672706604, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5172755718231201, "rewards/format_reward_step": 0.984375, "step": 181 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5355902777777777, "calib/avg_num_step_conf": 13.62890625, "calib/ece": 0.3652579365079366, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9880952380952381, "calib/gap": 0.0014022435897435015, "calib/mean_conf": 0.9843055555555557, "calib/mu_c": 0.9848397435897436, "calib/mu_w": 0.9834375000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3652579365079366, "calib/std_conf": 0.015046682941715125, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2640.0, "completions/max_terminated_length": 2640.0, "completions/mean_length": 757.38671875, "completions/mean_terminated_length": 766.3676147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 377.0, "epoch": 0.19413333333333332, "grad_norm": 0.004975506104528904, "learning_rate": 5.000000000000001e-07, "loss": -0.0333, "num_tokens": 47111563.0, "reward": 1.4083222150802612, "reward_std": 0.34334421157836914, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6174256801605225, "rewards/format_reward_step": 0.98046875, "step": 182 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.500189393939394, "calib/avg_num_step_conf": 13.6015625, "calib/ece": 0.4109251968503936, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.968503937007874, "calib/gap": 0.0006250000000000977, "calib/mean_conf": 0.9778543307086615, "calib/mu_c": 0.9781250000000001, "calib/mu_w": 0.9775, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4109251968503936, "calib/std_conf": 0.029806172419749376, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 782.46484375, "completions/mean_terminated_length": 782.46484375, "completions/min_length": 330.0, "completions/min_terminated_length": 330.0, "epoch": 0.1952, "grad_norm": 0.005101703107357025, "learning_rate": 4.7222222222222226e-07, "loss": 0.0187, "num_tokens": 47418554.0, "reward": 1.348827600479126, "reward_std": 0.4252949655056, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5804678201675415, "rewards/format_reward_step": 0.9921875, "step": 183 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5591919609057746, "calib/avg_num_step_conf": 13.23828125, "calib/ece": 0.32352362204724405, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.9763779527559056, "calib/gap": 0.004149287631633247, "calib/mean_conf": 0.981003937007874, "calib/mu_c": 0.9824251497005988, "calib/mu_w": 0.9782758620689656, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32352362204724405, "calib/std_conf": 0.020075545909267006, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 757.546875, "completions/mean_terminated_length": 760.5177001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.19626666666666667, "grad_norm": 0.004986181855201721, "learning_rate": 4.444444444444445e-07, "loss": 0.0007, "num_tokens": 47717766.0, "reward": 1.4816131591796875, "reward_std": 0.2942894697189331, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.6663514971733093, "rewards/format_reward_step": 0.9921875, "step": 184 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6218245614035087, "calib/avg_num_step_conf": 14.19921875, "calib/ece": 0.3641224489795918, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.8979591836734694, "calib/gap": 0.008778947368420775, "calib/mean_conf": 0.9637959183673469, "calib/mu_c": 0.9671999999999997, "calib/mu_w": 0.958421052631579, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.35783673469387745, "calib/std_conf": 0.07952331954016516, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 822.60546875, "completions/mean_terminated_length": 822.60546875, "completions/min_length": 311.0, "completions/min_terminated_length": 311.0, "epoch": 0.19733333333333333, "grad_norm": 0.004520988091826439, "learning_rate": 4.1666666666666667e-07, "loss": 0.0194, "num_tokens": 48035273.0, "reward": 1.3592960834503174, "reward_std": 0.3480474054813385, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6014046669006348, "rewards/format_reward_step": 0.9453125, "step": 185 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5727453399122807, "calib/avg_num_step_conf": 14.375, "calib/ece": 0.3745927419354841, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.9637096774193549, "calib/gap": -0.00312609649122797, "calib/mean_conf": 0.9729798387096775, "calib/mu_c": 0.9717697368421054, "calib/mu_w": 0.9748958333333334, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.36733467741935505, "calib/std_conf": 0.08144211528385956, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 796.85546875, "completions/mean_terminated_length": 806.3043823242188, "completions/min_length": 0.0, "completions/min_terminated_length": 311.0, "epoch": 0.1984, "grad_norm": 0.004679948557168245, "learning_rate": 3.8888888888888895e-07, "loss": 0.0232, "num_tokens": 48344308.0, "reward": 1.3730634450912476, "reward_std": 0.3464033007621765, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5976893901824951, "rewards/format_reward_step": 0.9609375, "step": 186 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.55846881663113, "calib/avg_num_step_conf": 14.91015625, "calib/ece": 0.44058943089430896, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.991869918699187, "calib/gap": 0.0037559968017056855, "calib/mean_conf": 0.9853048780487804, "calib/mu_c": 0.9870149253731343, "calib/mu_w": 0.9832589285714286, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.44058943089430896, "calib/std_conf": 0.014199813085100618, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2854.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 816.32421875, "completions/mean_terminated_length": 826.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 299.0, "epoch": 0.19946666666666665, "grad_norm": 0.005040613003075123, "learning_rate": 3.611111111111111e-07, "loss": 0.0234, "num_tokens": 48654831.0, "reward": 1.2702534198760986, "reward_std": 0.47981759905815125, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5366007089614868, "rewards/format_reward_step": 0.95703125, "step": 187 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4861683734539167, "calib/avg_num_step_conf": 13.28515625, "calib/ece": 0.39433734939759035, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.9718875502008032, "calib/gap": -0.0018160659662185274, "calib/mean_conf": 0.9806827309236947, "calib/mu_c": 0.9799315068493151, "calib/mu_w": 0.9817475728155336, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.39433734939759035, "calib/std_conf": 0.021563759548728724, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2649.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 772.3671875, "completions/mean_terminated_length": 775.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.20053333333333334, "grad_norm": 0.005349774844944477, "learning_rate": 3.3333333333333335e-07, "loss": 0.0388, "num_tokens": 48956629.0, "reward": 1.3448392152786255, "reward_std": 0.33842799067497253, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5803034901618958, "rewards/format_reward_step": 0.96875, "step": 188 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5526046986721144, "calib/avg_num_step_conf": 11.64453125, "calib/ece": 0.3324015748031495, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9960629921259843, "calib/gap": 0.002968335035750891, "calib/mean_conf": 0.9811417322834645, "calib/mu_c": 0.9821818181818182, "calib/mu_w": 0.9792134831460673, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3319685039370078, "calib/std_conf": 0.01633421818188331, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2131.0, "completions/max_terminated_length": 2131.0, "completions/mean_length": 682.56640625, "completions/mean_terminated_length": 685.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 289.0, "epoch": 0.2016, "grad_norm": 0.005616314243525267, "learning_rate": 3.055555555555556e-07, "loss": 0.0066, "num_tokens": 49239134.0, "reward": 1.4676647186279297, "reward_std": 0.37088748812675476, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.6579858660697937, "rewards/format_reward_step": 0.98828125, "step": 189 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.569698703279939, "calib/avg_num_step_conf": 13.71875, "calib/ece": 0.445370634920635, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.9722222222222222, "calib/gap": -0.014109305873379219, "calib/mean_conf": 0.9711769841269843, "calib/mu_c": 0.9647942028985506, "calib/mu_w": 0.9789035087719298, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4344642857142858, "calib/std_conf": 0.09984325943395767, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2784.0, "completions/max_terminated_length": 2784.0, "completions/mean_length": 829.5703125, "completions/mean_terminated_length": 832.8236083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 283.0, "epoch": 0.20266666666666666, "grad_norm": 0.004342732951045036, "learning_rate": 2.7777777777777776e-07, "loss": 0.0434, "num_tokens": 49557112.0, "reward": 1.3048603534698486, "reward_std": 0.31585729122161865, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5472208857536316, "rewards/format_reward_step": 0.984375, "step": 190 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48879999999999996, "calib/avg_num_step_conf": 13.609375, "calib/ece": 0.48379200000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.988, "calib/gap": -0.001024000000000247, "calib/mean_conf": 0.9837920000000001, "calib/mu_c": 0.9832799999999999, "calib/mu_w": 0.9843040000000002, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.48379200000000006, "calib/std_conf": 0.014890155674135848, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 760.3828125, "completions/mean_terminated_length": 769.3992309570312, "completions/min_length": 0.0, "completions/min_terminated_length": 320.0, "epoch": 0.20373333333333332, "grad_norm": 0.005018308758735657, "learning_rate": 2.5000000000000004e-07, "loss": 0.0128, "num_tokens": 49855938.0, "reward": 1.228130578994751, "reward_std": 0.3813462257385254, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5031362771987915, "rewards/format_reward_step": 0.9765625, "step": 191 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.588122332859175, "calib/avg_num_step_conf": 13.0859375, "calib/ece": 0.36819135802469133, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.9835390946502057, "calib/gap": 0.017762873399715606, "calib/mean_conf": 0.9772448559670782, "calib/mu_c": 0.9841891891891892, "calib/mu_w": 0.9664263157894736, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.36819135802469133, "calib/std_conf": 0.06522785896110676, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2817.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 741.56640625, "completions/mean_terminated_length": 765.4878540039062, "completions/min_length": 0.0, "completions/min_terminated_length": 324.0, "epoch": 0.2048, "grad_norm": 0.0051372176967561245, "learning_rate": 2.2222222222222224e-07, "loss": -0.0055, "num_tokens": 50150755.0, "reward": 1.348089575767517, "reward_std": 0.41244158148765564, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5907105207443237, "rewards/format_reward_step": 0.94140625, "step": 192 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5299048013245033, "calib/avg_num_step_conf": 14.00390625, "calib/ece": 0.36805668016194343, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.9676113360323887, "calib/gap": 0.009059740618101109, "calib/mean_conf": 0.9793927125506071, "calib/mu_c": 0.9829139072847679, "calib/mu_w": 0.9738541666666668, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.36805668016194343, "calib/std_conf": 0.038410151053830825, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2966.0, "completions/max_terminated_length": 2966.0, "completions/mean_length": 757.72265625, "completions/mean_terminated_length": 775.9080200195312, "completions/min_length": 0.0, "completions/min_terminated_length": 405.0, "epoch": 0.20586666666666667, "grad_norm": 0.004883711691945791, "learning_rate": 1.9444444444444447e-07, "loss": -0.019, "num_tokens": 50450444.0, "reward": 1.3639910221099854, "reward_std": 0.5335198640823364, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5990757942199707, "rewards/format_reward_step": 0.94921875, "step": 193 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6402847021779061, "calib/avg_num_step_conf": 12.921875, "calib/ece": 0.38382470119521916, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9402390438247012, "calib/gap": 0.02168525321437953, "calib/mean_conf": 0.9734661354581674, "calib/mu_c": 0.9823648648648647, "calib/mu_w": 0.9606796116504852, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.38382470119521916, "calib/std_conf": 0.0386053926703595, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2622.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 734.34765625, "completions/mean_terminated_length": 737.2274780273438, "completions/min_length": 0.0, "completions/min_terminated_length": 338.0, "epoch": 0.20693333333333333, "grad_norm": 0.004884345922619104, "learning_rate": 1.6666666666666668e-07, "loss": -0.0004, "num_tokens": 50744381.0, "reward": 1.372166395187378, "reward_std": 0.28283581137657166, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6076140403747559, "rewards/format_reward_step": 0.98046875, "step": 194 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.559592822636301, "calib/avg_num_step_conf": 13.25390625, "calib/ece": 0.40950617283950624, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9670781893004116, "calib/gap": 0.00868322981366454, "calib/mean_conf": 0.9774074074074075, "calib/mu_c": 0.9811594202898551, "calib/mu_w": 0.9724761904761906, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.40950617283950624, "calib/std_conf": 0.028850877176519178, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 791.9609375, "completions/mean_terminated_length": 807.737060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 333.0, "epoch": 0.208, "grad_norm": 0.005530741531401873, "learning_rate": 1.3888888888888888e-07, "loss": -0.0004, "num_tokens": 51053107.0, "reward": 1.2913596630096436, "reward_std": 0.40997639298439026, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5592820048332214, "rewards/format_reward_step": 0.9453125, "step": 195 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5305325987144169, "calib/avg_num_step_conf": 12.5234375, "calib/ece": 0.37438735177865623, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.9881422924901185, "calib/gap": 0.004069264069264222, "calib/mean_conf": 0.9830830039525693, "calib/mu_c": 0.9846753246753248, "calib/mu_w": 0.9806060606060606, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.37438735177865623, "calib/std_conf": 0.019559906653369336, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1653.0, "completions/max_terminated_length": 1653.0, "completions/mean_length": 688.078125, "completions/mean_terminated_length": 690.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 364.0, "epoch": 0.20906666666666668, "grad_norm": 0.004695164505392313, "learning_rate": 1.1111111111111112e-07, "loss": 0.0017, "num_tokens": 51331799.0, "reward": 1.407560110092163, "reward_std": 0.3021622896194458, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.615901529788971, "rewards/format_reward_step": 0.98828125, "step": 196 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5234830418653947, "calib/avg_num_step_conf": 13.2890625, "calib/ece": 0.43194331983805667, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.9838056680161943, "calib/gap": -0.0001126126126125282, "calib/mean_conf": 0.9825506072874494, "calib/mu_c": 0.9825, "calib/mu_w": 0.9826126126126126, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.43194331983805667, "calib/std_conf": 0.018842315781338333, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 721.5546875, "completions/mean_terminated_length": 733.0079956054688, "completions/min_length": 0.0, "completions/min_terminated_length": 362.0, "epoch": 0.21013333333333334, "grad_norm": 0.005680236965417862, "learning_rate": 8.333333333333334e-08, "loss": -0.006, "num_tokens": 51621573.0, "reward": 1.2806005477905273, "reward_std": 0.4925641715526581, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5416699051856995, "rewards/format_reward_step": 0.95703125, "step": 197 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.638334026910806, "calib/avg_num_step_conf": 12.09375, "calib/ece": 0.330996015936255, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.9721115537848606, "calib/gap": 0.019513108614232277, "calib/mean_conf": 0.9764143426294822, "calib/mu_c": 0.9833333333333334, "calib/mu_w": 0.9638202247191011, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.330996015936255, "calib/std_conf": 0.04828422277298219, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 688.01171875, "completions/mean_terminated_length": 696.1699829101562, "completions/min_length": 0.0, "completions/min_terminated_length": 273.0, "epoch": 0.2112, "grad_norm": 0.005235373042523861, "learning_rate": 5.555555555555556e-08, "loss": 0.0059, "num_tokens": 51903088.0, "reward": 1.450615644454956, "reward_std": 0.3237333297729492, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.6551374793052673, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5294384057971014, "calib/avg_num_step_conf": 13.5703125, "calib/ece": 0.43600000000000017, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.972, "calib/gap": -0.00765010351966855, "calib/mean_conf": 0.9779200000000001, "calib/mu_c": 0.9744927536231885, "calib/mu_w": 0.9821428571428571, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.4309600000000002, "calib/std_conf": 0.06697815763366444, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 788.546875, "completions/mean_terminated_length": 794.7559204101562, "completions/min_length": 0.0, "completions/min_terminated_length": 310.0, "epoch": 0.21226666666666666, "grad_norm": 0.00531103927642107, "learning_rate": 2.777777777777778e-08, "loss": -0.0174, "num_tokens": 52209156.0, "reward": 1.2944998741149902, "reward_std": 0.4788621962070465, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5421249866485596, "rewards/format_reward_step": 0.96875, "step": 199 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.6175240929705216, "calib/avg_num_step_conf": 12.734375, "calib/ece": 0.36910000000000015, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.9547325102880658, "calib/gap": 0.021978380102040562, "calib/mean_conf": 0.9740382716049383, "calib/mu_c": 0.982721088435374, "calib/mu_w": 0.9607427083333334, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.36910000000000015, "calib/std_conf": 0.0670588137560041, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 767.05859375, "completions/mean_terminated_length": 785.468017578125, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.21333333333333335, "grad_norm": 0.00476312730461359, "learning_rate": 0.0, "loss": -0.0217, "num_tokens": 52513571.0, "reward": 1.3442987203598022, "reward_std": 0.33491945266723633, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.5948475003242493, "rewards/format_reward_step": 0.9453125, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.02728243867168203, "train_runtime": 9826.6555, "train_samples_per_second": 5.21, "train_steps_per_second": 0.02 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 52513571, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }