{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.38076182006817844, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.2003187250996017, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2948207171314741, "calib/gap": -0.026059730250481805, "calib/mean_conf": 0.8737051792828686, "calib/mu_c": 0.865606936416185, "calib/mu_w": 0.8916666666666668, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19239043824701207, "calib/std_conf": 0.09027744273295583, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7959393232205367, "calib/step_q_c_n": 857.0, "calib/step_q_gap": -0.006446568895645877, "calib/step_q_w": 0.8023858921161826, "calib/step_q_w_n": 482.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2492.0, "completions/max_terminated_length": 2492.0, "completions/mean_length": 474.94921875, "completions/mean_terminated_length": 478.68896484375, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.0010666666666666667, "grad_norm": 0.0069753071293234825, "kl": 0.000291675329208374, "learning_rate": 2.5000000000000004e-07, "loss": -0.0124, "num_tokens": 229171.0, "reward": 0.326894611120224, "reward_std": 0.18101200461387634, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7142800688743591, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.39095962047576904, "step": 1 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.44343065693430656, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.3349411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2823529411764706, "calib/gap": 0.002352468143016151, "calib/mean_conf": 0.8721960784313726, "calib/mu_c": 0.8732846715328467, "calib/mu_w": 0.8709322033898306, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3349411764705883, "calib/std_conf": 0.07627016470309335, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7954391371340525, "calib/step_q_c_n": 649.0, "calib/step_q_gap": 0.011011892552009073, "calib/step_q_w": 0.7844272445820434, "calib/step_q_w_n": 646.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1966.0, "completions/max_terminated_length": 1966.0, "completions/mean_length": 492.9765625, "completions/mean_terminated_length": 494.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.0021333333333333334, "grad_norm": 0.007095666602253914, "kl": 0.00037539005279541016, "learning_rate": 5.000000000000001e-07, "loss": 0.0052, "num_tokens": 458661.0, "reward": 0.256092369556427, "reward_std": 0.19392633438110352, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6320762038230896, "rewards/format_reward_step": 0.99609375, "rewards/step_l1_reward": -0.4261414408683777, "step": 2 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5330286663896967, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.22395256916996056, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2648221343873518, "calib/gap": 0.004881595346905021, "calib/mean_conf": 0.87699604743083, "calib/mu_c": 0.8786746987951808, "calib/mu_w": 0.8737931034482758, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22241106719367598, "calib/std_conf": 0.04800020569979932, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.776433203631647, "calib/step_q_c_n": 771.0, "calib/step_q_gap": 0.0018650654358696173, "calib/step_q_w": 0.7745681381957774, "calib/step_q_w_n": 521.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1968.0, "completions/max_terminated_length": 1968.0, "completions/mean_length": 483.37109375, "completions/mean_terminated_length": 487.1771545410156, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.0032, "grad_norm": 0.00795243214815855, "kl": 0.00039833784103393555, "learning_rate": 7.5e-07, "loss": 0.0129, "num_tokens": 687660.0, "reward": 0.32164907455444336, "reward_std": 0.16832180321216583, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.715328574180603, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.39859285950660706, "step": 3 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.479371387283237, "calib/avg_num_step_conf": 5.125, "calib/ece": 0.18932806324110682, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.22529644268774704, "calib/gap": -0.0005520231213871352, "calib/mean_conf": 0.8731225296442688, "calib/mu_c": 0.8729479768786128, "calib/mu_w": 0.8734999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18932806324110682, "calib/std_conf": 0.047541668911902576, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7937211981566821, "calib/step_q_c_n": 868.0, "calib/step_q_gap": 0.010252729688213558, "calib/step_q_w": 0.7834684684684685, "calib/step_q_w_n": 444.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 523.90625, "completions/mean_terminated_length": 523.90625, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.004266666666666667, "grad_norm": 0.007437299005687237, "kl": 0.00030091404914855957, "learning_rate": 1.0000000000000002e-06, "loss": 0.0375, "num_tokens": 927948.0, "reward": 0.32690131664276123, "reward_std": 0.157812237739563, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7328823804855347, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.411111056804657, "step": 4 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.43493761140819964, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.35247011952191243, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2908366533864542, "calib/gap": -0.007580213903743083, "calib/mean_conf": 0.8783665338645419, "calib/mu_c": 0.8747727272727274, "calib/mu_w": 0.8823529411764705, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.35247011952191243, "calib/std_conf": 0.0467523047937078, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7908806818181818, "calib/step_q_c_n": 704.0, "calib/step_q_gap": 0.0046757482318440236, "calib/step_q_w": 0.7862049335863378, "calib/step_q_w_n": 527.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 521.390625, "completions/mean_terminated_length": 521.390625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.005333333333333333, "grad_norm": 0.007108544930815697, "kl": 0.00028464198112487793, "learning_rate": 1.25e-06, "loss": -0.0318, "num_tokens": 1168112.0, "reward": 0.2227281630039215, "reward_std": 0.178195059299469, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6066081523895264, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.45880815386772156, "step": 5 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47032474804031354, "calib/avg_num_step_conf": 5.12890625, "calib/ece": 0.3324705882352943, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3254901960784314, "calib/gap": -0.0035386338185892097, "calib/mean_conf": 0.8854117647058822, "calib/mu_c": 0.8838297872340424, "calib/mu_w": 0.8873684210526316, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3324705882352943, "calib/std_conf": 0.03807792653287041, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7958333333333333, "calib/step_q_c_n": 672.0, "calib/step_q_gap": 0.001792771710868335, "calib/step_q_w": 0.794040561622465, "calib/step_q_w_n": 641.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1633.0, "completions/max_terminated_length": 1633.0, "completions/mean_length": 438.1015625, "completions/mean_terminated_length": 438.1015625, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.0064, "grad_norm": 0.0097042853012681, "kl": 0.0008447170257568359, "learning_rate": 1.5e-06, "loss": -0.0065, "num_tokens": 1386218.0, "reward": 0.24446246027946472, "reward_std": 0.16572800278663635, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6327519416809082, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.45242077112197876, "step": 6 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48979800040807997, "calib/avg_num_step_conf": 5.30078125, "calib/ece": 0.21792968750000008, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.30859375, "calib/gap": -0.0032122696048426658, "calib/mean_conf": 0.8773046875, "calib/mu_c": 0.8762130177514792, "calib/mu_w": 0.8794252873563219, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2175390625000001, "calib/std_conf": 0.05623056033445998, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7863606911447084, "calib/step_q_c_n": 926.0, "calib/step_q_gap": -0.01681796314763484, "calib/step_q_w": 0.8031786542923433, "calib/step_q_w_n": 431.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1580.0, "completions/max_terminated_length": 1580.0, "completions/mean_length": 522.12890625, "completions/mean_terminated_length": 524.176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.007466666666666667, "grad_norm": 0.006987900473177433, "kl": 0.00025841593742370605, "learning_rate": 1.75e-06, "loss": 0.0076, "num_tokens": 1627307.0, "reward": 0.3210902214050293, "reward_std": 0.1542452871799469, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.7219562530517578, "rewards/format_reward_step": 0.99609375, "rewards/step_l1_reward": -0.4110258221626282, "step": 7 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44363992172211353, "calib/avg_num_step_conf": 4.79296875, "calib/ece": 0.2905179282868527, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.23904382470119523, "calib/gap": 0.00294846705805607, "calib/mean_conf": 0.872191235059761, "calib/mu_c": 0.8734246575342466, "calib/mu_w": 0.8704761904761905, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2905179282868527, "calib/std_conf": 0.07471845750082982, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.796625386996904, "calib/step_q_c_n": 646.0, "calib/step_q_gap": 0.03562710816729986, "calib/step_q_w": 0.7609982788296041, "calib/step_q_w_n": 581.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 529.171875, "completions/mean_terminated_length": 529.171875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.008533333333333334, "grad_norm": 0.007049232255667448, "kl": 0.00041344761848449707, "learning_rate": 2.0000000000000003e-06, "loss": -0.0138, "num_tokens": 1869287.0, "reward": 0.28460854291915894, "reward_std": 0.1505751758813858, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6487230062484741, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.3873184621334076, "step": 8 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5248116663211003, "calib/avg_num_step_conf": 4.74609375, "calib/ece": 0.24740000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.236, "calib/gap": 0.0012861980786510463, "calib/mean_conf": 0.8788400000000001, "calib/mu_c": 0.879308176100629, "calib/mu_w": 0.8780219780219779, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24512000000000003, "calib/std_conf": 0.0420173107183218, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.778821243523316, "calib/step_q_c_n": 772.0, "calib/step_q_gap": 0.010085351875460669, "calib/step_q_w": 0.7687358916478554, "calib/step_q_w_n": 443.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2856.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 494.9921875, "completions/mean_terminated_length": 498.8897705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.0096, "grad_norm": 0.0068976497277617455, "kl": 0.0003219097852706909, "learning_rate": 2.25e-06, "loss": 0.022, "num_tokens": 2103541.0, "reward": 0.2907140254974365, "reward_std": 0.18676680326461792, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.689523458480835, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.4260641038417816, "step": 9 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4660277636271014, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.2923921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.34901960784313724, "calib/gap": -0.005308201732042828, "calib/mean_conf": 0.8845490196078432, "calib/mu_c": 0.882384105960265, "calib/mu_w": 0.8876923076923078, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2923921568627451, "calib/std_conf": 0.04479684588226738, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7939179632248938, "calib/step_q_c_n": 707.0, "calib/step_q_gap": 0.013187528442285212, "calib/step_q_w": 0.7807304347826086, "calib/step_q_w_n": 575.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2350.0, "completions/max_terminated_length": 2350.0, "completions/mean_length": 492.23828125, "completions/mean_terminated_length": 492.23828125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.010666666666666666, "grad_norm": 0.006544630043208599, "kl": 0.00033104419708251953, "learning_rate": 2.5e-06, "loss": 0.0179, "num_tokens": 2336354.0, "reward": 0.2788810431957245, "reward_std": 0.18121352791786194, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6639195084571838, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.4217824339866638, "step": 10 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4625872318428535, "calib/avg_num_step_conf": 5.32421875, "calib/ece": 0.31301587301587314, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.23412698412698413, "calib/gap": -0.022662186611527524, "calib/mean_conf": 0.8665873015873017, "calib/mu_c": 0.857054794520548, "calib/mu_w": 0.8797169811320755, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30011904761904773, "calib/std_conf": 0.0959800136140514, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7820560747663551, "calib/step_q_c_n": 749.0, "calib/step_q_gap": 0.030459983561143433, "calib/step_q_w": 0.7515960912052116, "calib/step_q_w_n": 614.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 514.359375, "completions/mean_terminated_length": 516.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.011733333333333333, "grad_norm": 0.006941006984561682, "kl": 0.0005399882793426514, "learning_rate": 2.7500000000000004e-06, "loss": -0.0012, "num_tokens": 2572510.0, "reward": 0.2535046935081482, "reward_std": 0.17274489998817444, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.643332839012146, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.4472609758377075, "step": 11 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4752100840336134, "calib/avg_num_step_conf": 5.11328125, "calib/ece": 0.2063385826771654, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2755905511811024, "calib/gap": -0.003327731092436781, "calib/mean_conf": 0.8756299212598426, "calib/mu_c": 0.874529411764706, "calib/mu_w": 0.8778571428571428, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.2063385826771654, "calib/std_conf": 0.05151443542808282, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7933858267716535, "calib/step_q_c_n": 762.0, "calib/step_q_gap": 0.01854121982466983, "calib/step_q_w": 0.7748446069469836, "calib/step_q_w_n": 547.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2058.0, "completions/max_terminated_length": 2058.0, "completions/mean_length": 478.34765625, "completions/mean_terminated_length": 478.34765625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.0128, "grad_norm": 0.007310581859201193, "kl": 0.003771156072616577, "learning_rate": 3e-06, "loss": 0.0223, "num_tokens": 2799143.0, "reward": 0.33672308921813965, "reward_std": 0.16366788744926453, "rewards/accuracy_reward_step": 0.6640625, "rewards/final_brier_reward_step": 0.7147250175476074, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3701850175857544, "step": 12 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5062836021505376, "calib/avg_num_step_conf": 4.53125, "calib/ece": 0.23185770750988133, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.18972332015810275, "calib/gap": 0.0052197580645162445, "calib/mean_conf": 0.8642687747035573, "calib/mu_c": 0.8661875, "calib/mu_w": 0.8609677419354838, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.23185770750988133, "calib/std_conf": 0.05012197467318067, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7799859747545582, "calib/step_q_c_n": 713.0, "calib/step_q_gap": -0.00462252636401006, "calib/step_q_w": 0.7846085011185683, "calib/step_q_w_n": 447.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 476.8046875, "completions/mean_terminated_length": 476.8046875, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.013866666666666666, "grad_norm": 0.007094182539731264, "kl": 0.0008336901664733887, "learning_rate": 3.2500000000000002e-06, "loss": 0.0411, "num_tokens": 3025797.0, "reward": 0.3195507228374481, "reward_std": 0.16193151473999023, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7047258019447327, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.3874993324279785, "step": 13 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44118594908153397, "calib/avg_num_step_conf": 4.96484375, "calib/ece": 0.3032539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.26587301587301587, "calib/gap": -0.012412504028359472, "calib/mean_conf": 0.877063492063492, "calib/mu_c": 0.8717931034482759, "calib/mu_w": 0.8842056074766353, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.30246031746031743, "calib/std_conf": 0.046531563964134603, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7948476052249636, "calib/step_q_c_n": 689.0, "calib/step_q_gap": 0.03369640247582262, "calib/step_q_w": 0.7611512027491409, "calib/step_q_w_n": 582.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 527.546875, "completions/mean_terminated_length": 531.7008056640625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.014933333333333333, "grad_norm": 0.008941327221691608, "kl": 0.01397097110748291, "learning_rate": 3.5e-06, "loss": 0.0107, "num_tokens": 3266249.0, "reward": 0.2711637616157532, "reward_std": 0.18728607892990112, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6459636688232422, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.4130111634731293, "step": 14 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5223948220064725, "calib/avg_num_step_conf": 4.3515625, "calib/ece": 0.2808300395256915, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": 0.0034103559870551914, "calib/mean_conf": 0.8734782608695653, "calib/mu_c": 0.8748666666666668, "calib/mu_w": 0.8714563106796116, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28071146245059275, "calib/std_conf": 0.05053622073781924, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7761708860759493, "calib/step_q_c_n": 632.0, "calib/step_q_gap": -0.018082225957245734, "calib/step_q_w": 0.794253112033195, "calib/step_q_w_n": 482.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 457.26171875, "completions/mean_terminated_length": 457.26171875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.016, "grad_norm": 0.007930455729365349, "kl": 0.0013884305953979492, "learning_rate": 3.7500000000000005e-06, "loss": 0.004, "num_tokens": 3491188.0, "reward": 0.2793067693710327, "reward_std": 0.16331849992275238, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6710308790206909, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.427261084318161, "step": 15 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47225090345895715, "calib/avg_num_step_conf": 5.11328125, "calib/ece": 0.27375494071146245, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2490118577075099, "calib/gap": 0.0075025813113061, "calib/mean_conf": 0.862687747035573, "calib/mu_c": 0.8657718120805369, "calib/mu_w": 0.8582692307692308, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.27375494071146245, "calib/std_conf": 0.08306795951084686, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7630601792573624, "calib/step_q_c_n": 781.0, "calib/step_q_gap": 0.005200330772513917, "calib/step_q_w": 0.7578598484848484, "calib/step_q_w_n": 528.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2105.0, "completions/max_terminated_length": 2105.0, "completions/mean_length": 607.16015625, "completions/mean_terminated_length": 609.5411987304688, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.017066666666666667, "grad_norm": 0.006173198111355305, "kl": 0.0014122724533081055, "learning_rate": 4.000000000000001e-06, "loss": 0.0132, "num_tokens": 3755469.0, "reward": 0.28060784935951233, "reward_std": 0.15960822999477386, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.66645348072052, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.41773784160614014, "step": 16 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5471626733921816, "calib/avg_num_step_conf": 4.421875, "calib/ece": 0.15024193548387096, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.1774193548387097, "calib/gap": -0.0007297183690626596, "calib/mean_conf": 0.855, "calib/mu_c": 0.8548087431693989, "calib/mu_w": 0.8555384615384616, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.13366935483870968, "calib/std_conf": 0.06292596776653371, "calib/step_conf_rate": 0.94921875, "calib/step_q_c": 0.7674694376528117, "calib/step_q_c_n": 818.0, "calib/step_q_gap": 0.030176444022238402, "calib/step_q_w": 0.7372929936305733, "calib/step_q_w_n": 314.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 511.23046875, "completions/mean_terminated_length": 517.29248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.018133333333333335, "grad_norm": 0.007242945488542318, "kl": 0.0038924217224121094, "learning_rate": 4.25e-06, "loss": 0.0083, "num_tokens": 3989872.0, "reward": 0.3711223602294922, "reward_std": 0.17457322776317596, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.74609375, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.3358803391456604, "step": 17 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4884920634920635, "calib/avg_num_step_conf": 3.5625, "calib/ece": 0.31554655870445353, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.16194331983805668, "calib/gap": 0.00042129629629628074, "calib/mean_conf": 0.8621052631578948, "calib/mu_c": 0.8622962962962963, "calib/mu_w": 0.8618750000000001, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.31554655870445353, "calib/std_conf": 0.051389804403117426, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.7643207126948777, "calib/step_q_c_n": 449.0, "calib/step_q_gap": 0.0965669329972535, "calib/step_q_w": 0.6677537796976242, "calib/step_q_w_n": 463.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 487.09375, "completions/mean_terminated_length": 492.8695983886719, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.0192, "grad_norm": 0.007715197745710611, "kl": 0.0037174224853515625, "learning_rate": 4.5e-06, "loss": 0.0471, "num_tokens": 4225288.0, "reward": 0.24096588790416718, "reward_std": 0.15334269404411316, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6134636998176575, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.4260631203651428, "step": 18 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4771100164203613, "calib/avg_num_step_conf": 3.140625, "calib/ece": 0.28051999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.18, "calib/gap": 0.011359605911330006, "calib/mean_conf": 0.8551599999999999, "calib/mu_c": 0.8599310344827586, "calib/mu_w": 0.8485714285714286, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.2778399999999999, "calib/std_conf": 0.09371112207203582, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7566736842105263, "calib/step_q_c_n": 475.0, "calib/step_q_gap": 0.01700803071508561, "calib/step_q_w": 0.7396656534954407, "calib/step_q_w_n": 329.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2683.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 458.39453125, "completions/mean_terminated_length": 460.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.020266666666666665, "grad_norm": 0.007385259959846735, "kl": 0.0075931549072265625, "learning_rate": 4.75e-06, "loss": 0.0268, "num_tokens": 4447397.0, "reward": 0.279776006937027, "reward_std": 0.19024288654327393, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6482691764831543, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.39340460300445557, "step": 19 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.43082368082368083, "calib/avg_num_step_conf": 3.58984375, "calib/ece": 0.26715415019762856, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.16600790513833993, "calib/gap": 0.0013693693693692666, "calib/mean_conf": 0.8521343873517787, "calib/mu_c": 0.8527027027027028, "calib/mu_w": 0.8513333333333335, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.26715415019762856, "calib/std_conf": 0.07074553382822461, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7535021097046413, "calib/step_q_c_n": 474.0, "calib/step_q_gap": 0.04516503105295577, "calib/step_q_w": 0.7083370786516855, "calib/step_q_w_n": 445.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 434.2578125, "completions/mean_terminated_length": 434.2578125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.021333333333333333, "grad_norm": 0.007737329229712486, "kl": 0.00714874267578125, "learning_rate": 5e-06, "loss": -0.006, "num_tokens": 4663439.0, "reward": 0.29337871074676514, "reward_std": 0.14856009185314178, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6688362956047058, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3937976360321045, "step": 20 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.506896551724138, "calib/avg_num_step_conf": 3.28125, "calib/ece": 0.19932539682539668, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.1626984126984127, "calib/gap": 0.0032706374085688106, "calib/mean_conf": 0.8509920634920635, "calib/mu_c": 0.8521212121212123, "calib/mu_w": 0.8488505747126435, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.19777777777777764, "calib/std_conf": 0.05771619068485525, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7341996233521657, "calib/step_q_c_n": 531.0, "calib/step_q_gap": 0.001448814290677003, "calib/step_q_w": 0.7327508090614887, "calib/step_q_w_n": 309.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 456.33203125, "completions/mean_terminated_length": 456.33203125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.0224, "grad_norm": 0.0093642957508564, "kl": 0.015064239501953125, "learning_rate": 4.9722222222222224e-06, "loss": 0.0232, "num_tokens": 4883220.0, "reward": 0.3449612259864807, "reward_std": 0.19086401164531708, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7126258015632629, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.3453596234321594, "step": 21 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5431784943241172, "calib/avg_num_step_conf": 3.0234375, "calib/ece": 0.17089843749999994, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.11328125, "calib/gap": 0.012915244794205805, "calib/mean_conf": 0.8466796875, "calib/mu_c": 0.8508670520231213, "calib/mu_w": 0.8379518072289155, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17089843749999994, "calib/std_conf": 0.0530285055409102, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7707037037037037, "calib/step_q_c_n": 540.0, "calib/step_q_gap": 0.04271225071225071, "calib/step_q_w": 0.727991452991453, "calib/step_q_w_n": 234.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 926.0, "completions/max_terminated_length": 926.0, "completions/mean_length": 413.66796875, "completions/mean_terminated_length": 415.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.023466666666666667, "grad_norm": 0.008529752492904663, "kl": 0.0120086669921875, "learning_rate": 4.944444444444445e-06, "loss": -0.0041, "num_tokens": 5090935.0, "reward": 0.3591231405735016, "reward_std": 0.14010533690452576, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.754540205001831, "rewards/format_reward_step": 1.0, "rewards/step_l1_reward": -0.37145018577575684, "step": 22 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5214104108122575, "calib/avg_num_step_conf": 2.8984375, "calib/ece": 0.2740485829959515, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.10121457489878542, "calib/gap": 0.0029365716579685675, "calib/mean_conf": 0.8438461538461539, "calib/mu_c": 0.8451063829787234, "calib/mu_w": 0.8421698113207549, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.27352226720647776, "calib/std_conf": 0.056311961962853735, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.7343103448275862, "calib/step_q_c_n": 406.0, "calib/step_q_gap": 0.006215106732348152, "calib/step_q_w": 0.728095238095238, "calib/step_q_w_n": 336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2372.0, "completions/max_terminated_length": 2372.0, "completions/mean_length": 472.8125, "completions/mean_terminated_length": 476.5354309082031, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.024533333333333334, "grad_norm": 0.0072917933575809, "kl": 0.01273345947265625, "learning_rate": 4.9166666666666665e-06, "loss": -0.0068, "num_tokens": 5315911.0, "reward": 0.26862412691116333, "reward_std": 0.19663289189338684, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.631864070892334, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.3930532932281494, "step": 23 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5548489131166296, "calib/avg_num_step_conf": 2.79296875, "calib/ece": 0.31856557377049183, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.12704918032786885, "calib/gap": 0.01096641765933104, "calib/mean_conf": 0.8314344262295084, "calib/mu_c": 0.8366929133858269, "calib/mu_w": 0.8257264957264958, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.31475409836065577, "calib/std_conf": 0.07833298906265776, "calib/step_conf_rate": 0.94921875, "calib/step_q_c": 0.7263687150837987, "calib/step_q_c_n": 358.0, "calib/step_q_gap": 0.0031194153639106936, "calib/step_q_w": 0.723249299719888, "calib/step_q_w_n": 357.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2725.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 507.5859375, "completions/mean_terminated_length": 519.7680053710938, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.0256, "grad_norm": 0.0077531770803034306, "kl": 0.011515617370605469, "learning_rate": 4.888888888888889e-06, "loss": 0.0301, "num_tokens": 5550365.0, "reward": 0.25186437368392944, "reward_std": 0.2193448543548584, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6128253936767578, "rewards/format_reward_step": 0.9375, "rewards/step_l1_reward": -0.395815372467041, "step": 24 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.47339075854700857, "calib/avg_num_step_conf": 3.046875, "calib/ece": 0.23178571428571426, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.14285714285714285, "calib/gap": -0.0008333333333332416, "calib/mean_conf": 0.8478174603174603, "calib/mu_c": 0.8475, "calib/mu_w": 0.8483333333333333, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.23027777777777775, "calib/std_conf": 0.06529237063693799, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7341647058823529, "calib/step_q_c_n": 425.0, "calib/step_q_gap": 0.009347804473902266, "calib/step_q_w": 0.7248169014084507, "calib/step_q_w_n": 355.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2761.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 452.5703125, "completions/mean_terminated_length": 452.5703125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 0.02666666666666667, "grad_norm": 0.008476924151182175, "kl": 0.014321327209472656, "learning_rate": 4.861111111111111e-06, "loss": 0.0502, "num_tokens": 5769447.0, "reward": 0.3191983699798584, "reward_std": 0.16112613677978516, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6910125017166138, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.36902207136154175, "step": 25 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5351290322580645, "calib/avg_num_step_conf": 2.77734375, "calib/ece": 0.23690196078431378, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": 0.013729032258064544, "calib/mean_conf": 0.8447450980392157, "calib/mu_c": 0.8501290322580646, "calib/mu_w": 0.8364, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23690196078431378, "calib/std_conf": 0.05971692804874195, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.7648410757946209, "calib/step_q_c_n": 409.0, "calib/step_q_gap": 0.0505033274502501, "calib/step_q_w": 0.7143377483443708, "calib/step_q_w_n": 302.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1222.0, "completions/max_terminated_length": 1222.0, "completions/mean_length": 433.0078125, "completions/mean_terminated_length": 434.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.027733333333333332, "grad_norm": 0.07723195105791092, "kl": 0.06496047973632812, "learning_rate": 4.833333333333333e-06, "loss": 0.0067, "num_tokens": 5985537.0, "reward": 0.3232446312904358, "reward_std": 0.1632021963596344, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7019386291503906, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.37419939041137695, "step": 26 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5066225165562914, "calib/avg_num_step_conf": 3.1015625, "calib/ece": 0.24322834645669295, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07874015748031496, "calib/gap": -0.0005748087185750705, "calib/mean_conf": 0.8377165354330709, "calib/mu_c": 0.8374834437086093, "calib/mu_w": 0.8380582524271843, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.24322834645669295, "calib/std_conf": 0.05801423903925538, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7444008264462809, "calib/step_q_c_n": 484.0, "calib/step_q_gap": 0.03998147160757126, "calib/step_q_w": 0.7044193548387097, "calib/step_q_w_n": 310.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2068.0, "completions/max_terminated_length": 2068.0, "completions/mean_length": 468.66015625, "completions/mean_terminated_length": 468.66015625, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 0.0288, "grad_norm": 0.006958463229238987, "kl": 0.013710975646972656, "learning_rate": 4.805555555555556e-06, "loss": 0.0335, "num_tokens": 6210730.0, "reward": 0.301924467086792, "reward_std": 0.2039233148097992, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6822699308395386, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3924834728240967, "step": 27 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5399909111565553, "calib/avg_num_step_conf": 2.33984375, "calib/ece": 0.18122950819672146, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.10245901639344263, "calib/gap": 0.010517306672726212, "calib/mean_conf": 0.842704918032787, "calib/mu_c": 0.846196319018405, "calib/mu_w": 0.8356790123456788, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.1779508196721313, "calib/std_conf": 0.07354085387679224, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.7491463414634147, "calib/step_q_c_n": 410.0, "calib/step_q_gap": 0.016924119241192304, "calib/step_q_w": 0.7322222222222224, "calib/step_q_w_n": 189.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 521.23046875, "completions/mean_terminated_length": 521.23046875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.029866666666666666, "grad_norm": 0.006662163883447647, "kl": 0.009778022766113281, "learning_rate": 4.777777777777778e-06, "loss": 0.0248, "num_tokens": 6451109.0, "reward": 0.3436114192008972, "reward_std": 0.1427958607673645, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6973953247070312, "rewards/format_reward_step": 0.9296875, "rewards/step_l1_reward": -0.32345378398895264, "step": 28 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5013983739837398, "calib/avg_num_step_conf": 3.16015625, "calib/ece": 0.35213709677419347, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.1532258064516129, "calib/gap": 0.0003726829268290155, "calib/mean_conf": 0.8481048387096773, "calib/mu_c": 0.8482926829268291, "calib/mu_w": 0.8479200000000001, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.35213709677419347, "calib/std_conf": 0.06029528395247401, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.6996764705882352, "calib/step_q_c_n": 340.0, "calib/step_q_gap": 0.00660610811488771, "calib/step_q_w": 0.6930703624733475, "calib/step_q_w_n": 469.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 540.2890625, "completions/mean_terminated_length": 542.4078979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.030933333333333334, "grad_norm": 0.006647651549428701, "kl": 0.011612892150878906, "learning_rate": 4.75e-06, "loss": 0.0118, "num_tokens": 6696551.0, "reward": 0.25028783082962036, "reward_std": 0.2021082192659378, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5928003787994385, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.3789435029029846, "step": 29 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.46759906759906755, "calib/avg_num_step_conf": 3.375, "calib/ece": 0.2636693548387095, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.10483870967741936, "calib/gap": 0.010287712287712414, "calib/mean_conf": 0.8322177419354838, "calib/mu_c": 0.8365734265734266, "calib/mu_w": 0.8262857142857142, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.2596370967741934, "calib/std_conf": 0.10654925110497106, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7221413721413722, "calib/step_q_c_n": 481.0, "calib/step_q_gap": 0.05825103271578458, "calib/step_q_w": 0.6638903394255876, "calib/step_q_w_n": 383.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 562.6953125, "completions/mean_terminated_length": 569.3676147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.032, "grad_norm": 0.006563671864569187, "kl": 0.010916709899902344, "learning_rate": 4.722222222222222e-06, "loss": 0.0258, "num_tokens": 6947585.0, "reward": 0.28651654720306396, "reward_std": 0.19402888417243958, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6579402685165405, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.3888133466243744, "step": 30 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5641335752200497, "calib/avg_num_step_conf": 3.38671875, "calib/ece": 0.3468852459016394, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.12295081967213115, "calib/gap": 0.020538869851508545, "calib/mean_conf": 0.8427868852459017, "calib/mu_c": 0.8531404958677686, "calib/mu_w": 0.8326016260162601, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.3468852459016394, "calib/std_conf": 0.07599894622955784, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6967750000000001, "calib/step_q_c_n": 400.0, "calib/step_q_gap": 0.028595128479657506, "calib/step_q_w": 0.6681798715203426, "calib/step_q_w_n": 467.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2868.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 563.71875, "completions/mean_terminated_length": 574.9482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.03306666666666667, "grad_norm": 0.006196488626301289, "kl": 0.012213706970214844, "learning_rate": 4.694444444444445e-06, "loss": 0.0209, "num_tokens": 7197809.0, "reward": 0.23471850156784058, "reward_std": 0.1854703426361084, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.59673011302948, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.41088682413101196, "step": 31 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5799528001020539, "calib/avg_num_step_conf": 2.91796875, "calib/ece": 0.30733067729083663, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.11155378486055777, "calib/gap": 0.028174512055109346, "calib/mean_conf": 0.8411952191235059, "calib/mu_c": 0.8543283582089554, "calib/mu_w": 0.826153846153846, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.30733067729083663, "calib/std_conf": 0.08186670329637882, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7777472527472528, "calib/step_q_c_n": 364.0, "calib/step_q_gap": 0.07980991593263154, "calib/step_q_w": 0.6979373368146212, "calib/step_q_w_n": 383.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2818.0, "completions/max_terminated_length": 2818.0, "completions/mean_length": 516.00390625, "completions/mean_terminated_length": 524.1944580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.034133333333333335, "grad_norm": 0.007397293113172054, "kl": 0.017431259155273438, "learning_rate": 4.666666666666667e-06, "loss": 0.0133, "num_tokens": 7436610.0, "reward": 0.2804139256477356, "reward_std": 0.17597326636314392, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6451066732406616, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.38271623849868774, "step": 32 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47163318452380953, "calib/avg_num_step_conf": 3.24609375, "calib/ece": 0.3450393700787403, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1220472440944882, "calib/gap": -0.005338541666666585, "calib/mean_conf": 0.8489763779527559, "calib/mu_c": 0.846328125, "calib/mu_w": 0.8516666666666666, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.3450393700787403, "calib/std_conf": 0.05202716803413796, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7498271604938274, "calib/step_q_c_n": 405.0, "calib/step_q_gap": 0.0334421839680058, "calib/step_q_w": 0.7163849765258216, "calib/step_q_w_n": 426.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1187.0, "completions/max_terminated_length": 1187.0, "completions/mean_length": 493.765625, "completions/mean_terminated_length": 497.6535339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.0352, "grad_norm": 0.006877983920276165, "kl": 0.01480865478515625, "learning_rate": 4.638888888888889e-06, "loss": 0.0006, "num_tokens": 7669886.0, "reward": 0.2519468367099762, "reward_std": 0.20320287346839905, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.619293749332428, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.41305631399154663, "step": 33 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5368427868427869, "calib/avg_num_step_conf": 3.515625, "calib/ece": 0.2832270916334661, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.11553784860557768, "calib/gap": 0.014276094276094331, "calib/mean_conf": 0.8529482071713148, "calib/mu_c": 0.8590909090909091, "calib/mu_w": 0.8448148148148148, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2832270916334661, "calib/std_conf": 0.057657400249739615, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7596969696969698, "calib/step_q_c_n": 462.0, "calib/step_q_gap": 0.06031340805313423, "calib/step_q_w": 0.6993835616438355, "calib/step_q_w_n": 438.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 483.65234375, "completions/mean_terminated_length": 487.46063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.03626666666666667, "grad_norm": 0.07357311248779297, "kl": 0.1785106658935547, "learning_rate": 4.611111111111112e-06, "loss": 0.0134, "num_tokens": 7898813.0, "reward": 0.3066682517528534, "reward_std": 0.2058257907629013, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6612108945846558, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.35490572452545166, "step": 34 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5123316197045618, "calib/avg_num_step_conf": 2.67578125, "calib/ece": 0.32262096774193544, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": 0.01582937463395584, "calib/mean_conf": 0.8347177419354839, "calib/mu_c": 0.8424409448818898, "calib/mu_w": 0.826611570247934, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.32262096774193544, "calib/std_conf": 0.07561522509302532, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7379228486646885, "calib/step_q_c_n": 337.0, "calib/step_q_gap": 0.03481940038882636, "calib/step_q_w": 0.7031034482758621, "calib/step_q_w_n": 348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2340.0, "completions/max_terminated_length": 2340.0, "completions/mean_length": 510.4453125, "completions/mean_terminated_length": 516.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.037333333333333336, "grad_norm": 0.006531782913953066, "kl": 0.016582489013671875, "learning_rate": 4.583333333333333e-06, "loss": 0.0256, "num_tokens": 8138743.0, "reward": 0.24612952768802643, "reward_std": 0.2067875862121582, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6203484535217285, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.42027685046195984, "step": 35 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6350965250965253, "calib/avg_num_step_conf": 3.0703125, "calib/ece": 0.12921568627450977, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.13333333333333333, "calib/gap": 0.03343629343629351, "calib/mean_conf": 0.8496862745098038, "calib/mu_c": 0.8588648648648648, "calib/mu_w": 0.8254285714285713, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12670588235294114, "calib/std_conf": 0.07523492362712644, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.7731095406360424, "calib/step_q_c_n": 566.0, "calib/step_q_gap": 0.06288226790876972, "calib/step_q_w": 0.7102272727272727, "calib/step_q_w_n": 220.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2080.0, "completions/max_terminated_length": 2080.0, "completions/mean_length": 478.0546875, "completions/mean_terminated_length": 479.929443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.0384, "grad_norm": 0.007612496614456177, "kl": 0.021648406982421875, "learning_rate": 4.555555555555556e-06, "loss": 0.025, "num_tokens": 8363837.0, "reward": 0.41307783126831055, "reward_std": 0.17080523073673248, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.787934422492981, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.30474740266799927, "step": 36 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6120358090185677, "calib/avg_num_step_conf": 3.1328125, "calib/ece": 0.36178861788617883, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.08536585365853659, "calib/gap": 0.0403474801061009, "calib/mean_conf": 0.8333333333333334, "calib/mu_c": 0.8546551724137932, "calib/mu_w": 0.8143076923076923, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.36178861788617883, "calib/std_conf": 0.10269274296655329, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.78, "calib/step_q_c_n": 342.0, "calib/step_q_gap": 0.17943478260869572, "calib/step_q_w": 0.6005652173913043, "calib/step_q_w_n": 460.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 560.125, "completions/mean_terminated_length": 562.3215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.039466666666666664, "grad_norm": 0.006658955942839384, "kl": 0.017736434936523438, "learning_rate": 4.527777777777778e-06, "loss": 0.0064, "num_tokens": 8614325.0, "reward": 0.25001418590545654, "reward_std": 0.1737680435180664, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6048921942710876, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.3876763582229614, "step": 37 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5477272727272727, "calib/avg_num_step_conf": 2.8203125, "calib/ece": 0.29455645161290317, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.1693548387096774, "calib/gap": 0.019081686429512468, "calib/mean_conf": 0.8390725806451613, "calib/mu_c": 0.847536231884058, "calib/mu_w": 0.8284545454545456, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2885887096774193, "calib/std_conf": 0.11037850373550395, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7636613272311213, "calib/step_q_c_n": 437.0, "calib/step_q_gap": 0.054924485125858125, "calib/step_q_w": 0.7087368421052632, "calib/step_q_w_n": 285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 496.72265625, "completions/mean_terminated_length": 502.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.04053333333333333, "grad_norm": 0.007322958204895258, "kl": 0.028581619262695312, "learning_rate": 4.5e-06, "loss": 0.0158, "num_tokens": 8848374.0, "reward": 0.2834533452987671, "reward_std": 0.18513981997966766, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6495933532714844, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.3850303888320923, "step": 38 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6027422990232908, "calib/avg_num_step_conf": 2.62109375, "calib/ece": 0.36640316205533596, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.19367588932806323, "calib/gap": 0.04174931129476589, "calib/mean_conf": 0.8392094861660079, "calib/mu_c": 0.86099173553719, "calib/mu_w": 0.8192424242424241, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3636758893280632, "calib/std_conf": 0.12069695329426747, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7932542372881356, "calib/step_q_c_n": 295.0, "calib/step_q_gap": 0.07586062026685902, "calib/step_q_w": 0.7173936170212766, "calib/step_q_w_n": 376.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1922.0, "completions/max_terminated_length": 1922.0, "completions/mean_length": 501.16015625, "completions/mean_terminated_length": 501.16015625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.0416, "grad_norm": 0.006672346033155918, "kl": 0.020376205444335938, "learning_rate": 4.472222222222223e-06, "loss": 0.0053, "num_tokens": 9082759.0, "reward": 0.2530670166015625, "reward_std": 0.17842620611190796, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6115871071815491, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.39607805013656616, "step": 39 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5051023622047244, "calib/avg_num_step_conf": 2.37109375, "calib/ece": 0.3546031746031745, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.15476190476190477, "calib/gap": 0.014383622047244082, "calib/mean_conf": 0.8461111111111111, "calib/mu_c": 0.85336, "calib/mu_w": 0.8389763779527559, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3523412698412697, "calib/std_conf": 0.08938921690430807, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.76053125, "calib/step_q_c_n": 320.0, "calib/step_q_gap": -0.016855509581881534, "calib/step_q_w": 0.7773867595818815, "calib/step_q_w_n": 287.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2447.0, "completions/max_terminated_length": 2447.0, "completions/mean_length": 512.81640625, "completions/mean_terminated_length": 514.8274536132812, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.042666666666666665, "grad_norm": 0.006264918949455023, "kl": 0.024440765380859375, "learning_rate": 4.444444444444444e-06, "loss": -0.0076, "num_tokens": 9320800.0, "reward": 0.23327161371707916, "reward_std": 0.20680826902389526, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6144461035728455, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.44087162613868713, "step": 40 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4858774267224972, "calib/avg_num_step_conf": 2.4375, "calib/ece": 0.14285156249999992, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.27734375, "calib/gap": 0.027073467834031195, "calib/mean_conf": 0.8530859375, "calib/mu_c": 0.8605945945945945, "calib/mu_w": 0.8335211267605633, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.1366406249999999, "calib/std_conf": 0.11130374596008032, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.7796583143507972, "calib/step_q_c_n": 439.0, "calib/step_q_gap": 0.07452317921566198, "calib/step_q_w": 0.7051351351351353, "calib/step_q_w_n": 185.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 444.66796875, "completions/mean_terminated_length": 446.41180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.04373333333333333, "grad_norm": 0.006443468388170004, "kl": 0.027767181396484375, "learning_rate": 4.416666666666667e-06, "loss": 0.0064, "num_tokens": 9541883.0, "reward": 0.4025619328022003, "reward_std": 0.21940746903419495, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.7732605338096619, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.3095429241657257, "step": 41 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5870038412291932, "calib/avg_num_step_conf": 2.6875, "calib/ece": 0.30221428571428577, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": 0.020968245838668298, "calib/mean_conf": 0.8657063492063491, "calib/mu_c": 0.8748591549295774, "calib/mu_w": 0.8538909090909091, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.30221428571428577, "calib/std_conf": 0.06530873777320191, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7882065217391303, "calib/step_q_c_n": 368.0, "calib/step_q_gap": 0.03461902173913034, "calib/step_q_w": 0.7535875, "calib/step_q_w_n": 320.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1991.0, "completions/max_terminated_length": 1991.0, "completions/mean_length": 417.30078125, "completions/mean_terminated_length": 420.58660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.0448, "grad_norm": 0.006725949700921774, "kl": 0.030731201171875, "learning_rate": 4.388888888888889e-06, "loss": 0.0069, "num_tokens": 9753080.0, "reward": 0.2923615574836731, "reward_std": 0.1730237454175949, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6582983732223511, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.3813877999782562, "step": 42 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6098401143897049, "calib/avg_num_step_conf": 2.5, "calib/ece": 0.22211764705882345, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.20784313725490197, "calib/gap": 0.05029702326790575, "calib/mean_conf": 0.8378039215686275, "calib/mu_c": 0.8571337579617834, "calib/mu_w": 0.8068367346938776, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22211764705882345, "calib/std_conf": 0.11507150711330402, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.7522802850356294, "calib/step_q_c_n": 421.0, "calib/step_q_gap": -0.003747112224644522, "calib/step_q_w": 0.7560273972602739, "calib/step_q_w_n": 219.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1255.0, "completions/max_terminated_length": 1255.0, "completions/mean_length": 475.69140625, "completions/mean_terminated_length": 477.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.04586666666666667, "grad_norm": 0.006733867339789867, "kl": 0.025842666625976562, "learning_rate": 4.361111111111112e-06, "loss": 0.0272, "num_tokens": 9980081.0, "reward": 0.3314400315284729, "reward_std": 0.22022783756256104, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7135285139083862, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.370179682970047, "step": 43 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5617777777777777, "calib/avg_num_step_conf": 2.71484375, "calib/ece": 0.36709163346613544, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2788844621513944, "calib/gap": 0.026170793650793667, "calib/mean_conf": 0.855617529880478, "calib/mu_c": 0.8686507936507937, "calib/mu_w": 0.84248, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3603585657370518, "calib/std_conf": 0.11360410889243894, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.7515193370165746, "calib/step_q_c_n": 362.0, "calib/step_q_gap": 0.06743525293249042, "calib/step_q_w": 0.6840840840840842, "calib/step_q_w_n": 333.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 513.328125, "completions/mean_terminated_length": 519.4150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.046933333333333334, "grad_norm": 0.006369642447680235, "kl": 0.026624679565429688, "learning_rate": 4.333333333333334e-06, "loss": -0.0139, "num_tokens": 10217813.0, "reward": 0.24526745080947876, "reward_std": 0.20343050360679626, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6108214855194092, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.4140365719795227, "step": 44 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5850052110474205, "calib/avg_num_step_conf": 2.69921875, "calib/ece": 0.23276679841897235, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.19367588932806323, "calib/gap": 0.08407959874934867, "calib/mean_conf": 0.8303162055335969, "calib/mu_c": 0.8638815789473684, "calib/mu_w": 0.7798019801980197, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.23114624505928857, "calib/std_conf": 0.16293715541536571, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8120765027322405, "calib/step_q_c_n": 366.0, "calib/step_q_gap": 0.13967650273224053, "calib/step_q_w": 0.6724, "calib/step_q_w_n": 325.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1983.0, "completions/max_terminated_length": 1983.0, "completions/mean_length": 484.37109375, "completions/mean_terminated_length": 486.2706298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.048, "grad_norm": 0.006604231894016266, "kl": 0.028827667236328125, "learning_rate": 4.305555555555556e-06, "loss": 0.0122, "num_tokens": 10446860.0, "reward": 0.3432024121284485, "reward_std": 0.18877151608467102, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7128074169158936, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.3428088426589966, "step": 45 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.622137661637931, "calib/avg_num_step_conf": 2.48828125, "calib/ece": 0.3403688524590163, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.27049180327868855, "calib/gap": 0.02542025862068953, "calib/mean_conf": 0.8513524590163934, "calib/mu_c": 0.8634375, "calib/mu_w": 0.8380172413793104, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.33356557377049173, "calib/std_conf": 0.11088418283123143, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7774074074074074, "calib/step_q_c_n": 324.0, "calib/step_q_gap": 0.04565021890900489, "calib/step_q_w": 0.7317571884984025, "calib/step_q_w_n": 313.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 520.828125, "completions/mean_terminated_length": 524.9291381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.04906666666666667, "grad_norm": 0.006163495592772961, "kl": 0.029521942138671875, "learning_rate": 4.277777777777778e-06, "loss": -0.0366, "num_tokens": 10684960.0, "reward": 0.25145965814590454, "reward_std": 0.20970991253852844, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6090492606163025, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.39519235491752625, "step": 46 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5917415577342048, "calib/avg_num_step_conf": 2.20703125, "calib/ece": 0.24120481927710852, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.3132530120481928, "calib/gap": 0.0656372549019607, "calib/mean_conf": 0.8511646586345382, "calib/mu_c": 0.8764705882352941, "calib/mu_w": 0.8108333333333334, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.23895582329317278, "calib/std_conf": 0.13462072632327893, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.7662576687116565, "calib/step_q_c_n": 326.0, "calib/step_q_gap": 0.09291038837692855, "calib/step_q_w": 0.6733472803347279, "calib/step_q_w_n": 239.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 505.50390625, "completions/mean_terminated_length": 505.50390625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.050133333333333335, "grad_norm": 0.006543243769556284, "kl": 0.030582427978515625, "learning_rate": 4.25e-06, "loss": 0.0041, "num_tokens": 10920345.0, "reward": 0.3438766896724701, "reward_std": 0.15386797487735748, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6899590492248535, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3115805983543396, "step": 47 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5662878787878788, "calib/avg_num_step_conf": 2.53125, "calib/ece": 0.39715999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.292, "calib/gap": 0.007679763739085632, "calib/mean_conf": 0.84764, "calib/mu_c": 0.8516949152542371, "calib/mu_w": 0.8440151515151515, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.38639999999999997, "calib/std_conf": 0.12591914230965837, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7769932432432433, "calib/step_q_c_n": 296.0, "calib/step_q_gap": 0.057533015970515855, "calib/step_q_w": 0.7194602272727274, "calib/step_q_w_n": 352.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 480.625, "completions/mean_terminated_length": 484.4094543457031, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.0512, "grad_norm": 0.007595698349177837, "kl": 0.03356170654296875, "learning_rate": 4.222222222222223e-06, "loss": -0.0104, "num_tokens": 11147073.0, "reward": 0.22138309478759766, "reward_std": 0.23210257291793823, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5816078186035156, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.4239978492259979, "step": 48 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5904684450227716, "calib/avg_num_step_conf": 2.63671875, "calib/ece": 0.29737051792828684, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.33067729083665337, "calib/gap": 0.0371346779440469, "calib/mean_conf": 0.8674900398406374, "calib/mu_c": 0.8831724137931035, "calib/mu_w": 0.8460377358490566, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.2935856573705179, "calib/std_conf": 0.10301275836929619, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7889066666666666, "calib/step_q_c_n": 375.0, "calib/step_q_gap": 0.05550666666666659, "calib/step_q_w": 0.7334, "calib/step_q_w_n": 300.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 474.73828125, "completions/mean_terminated_length": 476.60003662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.05226666666666667, "grad_norm": 0.007054316811263561, "kl": 0.032970428466796875, "learning_rate": 4.194444444444445e-06, "loss": 0.0348, "num_tokens": 11373142.0, "reward": 0.30179956555366516, "reward_std": 0.1905430108308792, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.657378077507019, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.3600289821624756, "step": 49 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5599271012006861, "calib/avg_num_step_conf": 2.3203125, "calib/ece": 0.24437246963562745, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.3360323886639676, "calib/gap": 0.021552315608919348, "calib/mean_conf": 0.88251012145749, "calib/mu_c": 0.890188679245283, "calib/mu_w": 0.8686363636363637, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.24157894736842098, "calib/std_conf": 0.07987955824040092, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.8135911602209945, "calib/step_q_c_n": 362.0, "calib/step_q_gap": 0.0036342636692704433, "calib/step_q_w": 0.8099568965517241, "calib/step_q_w_n": 232.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2032.0, "completions/max_terminated_length": 2032.0, "completions/mean_length": 500.54296875, "completions/mean_terminated_length": 504.4842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.05333333333333334, "grad_norm": 0.006964292377233505, "kl": 0.030660629272460938, "learning_rate": 4.166666666666667e-06, "loss": 0.0509, "num_tokens": 11606641.0, "reward": 0.3266167640686035, "reward_std": 0.21096104383468628, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6699769496917725, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.32924342155456543, "step": 50 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5441904761904762, "calib/avg_num_step_conf": 2.4921875, "calib/ece": 0.2988235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.43137254901960786, "calib/gap": 0.03331428571428563, "calib/mean_conf": 0.8698823529411764, "calib/mu_c": 0.8835999999999999, "calib/mu_w": 0.8502857142857143, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.29023529411764704, "calib/std_conf": 0.13659868903266995, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8239673913043479, "calib/step_q_c_n": 368.0, "calib/step_q_gap": 0.0506340579710145, "calib/step_q_w": 0.7733333333333334, "calib/step_q_w_n": 270.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 480.13671875, "completions/mean_terminated_length": 480.13671875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.0544, "grad_norm": 0.007857617922127247, "kl": 0.0311737060546875, "learning_rate": 4.138888888888889e-06, "loss": 0.021, "num_tokens": 11838852.0, "reward": 0.3062852919101715, "reward_std": 0.232842355966568, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.67255699634552, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.37561148405075073, "step": 51 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6458100558659218, "calib/avg_num_step_conf": 2.4453125, "calib/ece": 0.13555118110236228, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.35826771653543305, "calib/gap": 0.13500111731843578, "calib/mean_conf": 0.8263385826771653, "calib/mu_c": 0.8662011173184359, "calib/mu_w": 0.7312000000000001, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.1285826771653544, "calib/std_conf": 0.18965047504450225, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.800990099009901, "calib/step_q_c_n": 404.0, "calib/step_q_gap": 0.18676036928017137, "calib/step_q_w": 0.6142297297297297, "calib/step_q_w_n": 222.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2595.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 473.12109375, "completions/mean_terminated_length": 473.12109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.055466666666666664, "grad_norm": 0.0077812341041862965, "kl": 0.03348541259765625, "learning_rate": 4.111111111111111e-06, "loss": 0.0046, "num_tokens": 12067923.0, "reward": 0.41235560178756714, "reward_std": 0.2180776596069336, "rewards/accuracy_reward_step": 0.69921875, "rewards/final_brier_reward_step": 0.7757226228713989, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.286167711019516, "step": 52 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6079401925308937, "calib/avg_num_step_conf": 2.609375, "calib/ece": 0.23512096774193553, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3346774193548387, "calib/gap": 0.07823649894176288, "calib/mean_conf": 0.8342338709677419, "calib/mu_c": 0.8648344370860928, "calib/mu_w": 0.7865979381443299, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.230241935483871, "calib/std_conf": 0.1661525537294629, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8075830815709968, "calib/step_q_c_n": 331.0, "calib/step_q_gap": 0.21452670174903832, "calib/step_q_w": 0.5930563798219585, "calib/step_q_w_n": 337.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 509.87109375, "completions/mean_terminated_length": 515.9169921875, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.05653333333333333, "grad_norm": 0.007624692749232054, "kl": 0.034870147705078125, "learning_rate": 4.083333333333334e-06, "loss": -0.0483, "num_tokens": 12304274.0, "reward": 0.34297817945480347, "reward_std": 0.19570030272006989, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6943316459655762, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.31931272149086, "step": 53 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7167325428194995, "calib/avg_num_step_conf": 2.3203125, "calib/ece": 0.1405859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4375, "calib/gap": 0.12296597690459599, "calib/mean_conf": 0.8660546874999999, "calib/mu_c": 0.8991978609625669, "calib/mu_w": 0.7762318840579709, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1380859375, "calib/std_conf": 0.13993462807424523, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8436574074074074, "calib/step_q_c_n": 432.0, "calib/step_q_gap": 0.10569444444444431, "calib/step_q_w": 0.737962962962963, "calib/step_q_w_n": 162.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1246.0, "completions/max_terminated_length": 1246.0, "completions/mean_length": 410.34375, "completions/mean_terminated_length": 411.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.0576, "grad_norm": 0.013864198699593544, "kl": 0.08069229125976562, "learning_rate": 4.055555555555556e-06, "loss": 0.0006, "num_tokens": 12515554.0, "reward": 0.4472993016242981, "reward_std": 0.17225967347621918, "rewards/accuracy_reward_step": 0.73046875, "rewards/final_brier_reward_step": 0.8135707378387451, "rewards/format_reward_step": 1.0, "rewards/step_l1_reward": -0.26506587862968445, "step": 54 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7380197768762677, "calib/avg_num_step_conf": 2.16015625, "calib/ece": 0.3099603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.49603174603174605, "calib/gap": 0.14150354969574064, "calib/mean_conf": 0.849642857142857, "calib/mu_c": 0.9147794117647059, "calib/mu_w": 0.7732758620689653, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.3099603174603175, "calib/std_conf": 0.1763346819772764, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8392075471698113, "calib/step_q_c_n": 265.0, "calib/step_q_gap": 0.1505964360587002, "calib/step_q_w": 0.6886111111111111, "calib/step_q_w_n": 288.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2555.0, "completions/max_terminated_length": 2555.0, "completions/mean_length": 448.0078125, "completions/mean_terminated_length": 453.3201904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.058666666666666666, "grad_norm": 0.007659987546503544, "kl": 0.03458213806152344, "learning_rate": 4.027777777777779e-06, "loss": -0.0013, "num_tokens": 12738068.0, "reward": 0.3213605284690857, "reward_std": 0.23816221952438354, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6774871349334717, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.3347660303115845, "step": 55 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6091312533620226, "calib/avg_num_step_conf": 2.03515625, "calib/ece": 0.2891902834008097, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.4291497975708502, "calib/gap": 0.05400349650349667, "calib/mean_conf": 0.8345344129554656, "calib/mu_c": 0.8572727272727274, "calib/mu_w": 0.8032692307692307, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.2723886639676113, "calib/std_conf": 0.18332203758283339, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7831118881118881, "calib/step_q_c_n": 286.0, "calib/step_q_gap": 0.06847359023954769, "calib/step_q_w": 0.7146382978723405, "calib/step_q_w_n": 235.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 477.89453125, "completions/mean_terminated_length": 479.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.05973333333333333, "grad_norm": 0.007571075111627579, "kl": 0.034942626953125, "learning_rate": 4.000000000000001e-06, "loss": 0.0369, "num_tokens": 12967249.0, "reward": 0.322052001953125, "reward_std": 0.19588708877563477, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6582722663879395, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.3172932267189026, "step": 56 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6391849529780563, "calib/avg_num_step_conf": 1.97265625, "calib/ece": 0.2246428571428572, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.4801587301587302, "calib/gap": 0.07466875653082572, "calib/mean_conf": 0.8767063492063492, "calib/mu_c": 0.9024848484848487, "calib/mu_w": 0.827816091954023, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.22329365079365082, "calib/std_conf": 0.13108167618970798, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.796345029239766, "calib/step_q_c_n": 342.0, "calib/step_q_gap": 0.053768342123201696, "calib/step_q_w": 0.7425766871165643, "calib/step_q_w_n": 163.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2477.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 454.72265625, "completions/mean_terminated_length": 456.50592041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.0608, "grad_norm": 0.00668899342417717, "kl": 0.035167694091796875, "learning_rate": 3.972222222222223e-06, "loss": 0.0248, "num_tokens": 13190450.0, "reward": 0.36799943447113037, "reward_std": 0.23135200142860413, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7266637086868286, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.3148835003376007, "step": 57 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.573910719059075, "calib/avg_num_step_conf": 2.69921875, "calib/ece": 0.33363265306122447, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.5265306122448979, "calib/gap": 0.056478882651697426, "calib/mean_conf": 0.8477551020408163, "calib/mu_c": 0.8744961240310077, "calib/mu_w": 0.8180172413793103, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.3274285714285714, "calib/std_conf": 0.18628555330977323, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6834943181818182, "calib/step_q_c_n": 352.0, "calib/step_q_gap": 0.05476275475998926, "calib/step_q_w": 0.6287315634218289, "calib/step_q_w_n": 339.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 575.109375, "completions/mean_terminated_length": 575.109375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.06186666666666667, "grad_norm": 0.0061858044937253, "kl": 0.03353118896484375, "learning_rate": 3.944444444444445e-06, "loss": -0.0023, "num_tokens": 13443998.0, "reward": 0.26048970222473145, "reward_std": 0.2409365326166153, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.609772264957428, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.3801991641521454, "step": 58 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5171932495876157, "calib/avg_num_step_conf": 1.828125, "calib/ece": 0.2977470355731226, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.541501976284585, "calib/gap": 0.04947151376728853, "calib/mean_conf": 0.844703557312253, "calib/mu_c": 0.8664084507042255, "calib/mu_w": 0.8169369369369369, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2905928853754941, "calib/std_conf": 0.1965684139701614, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7663235294117647, "calib/step_q_c_n": 272.0, "calib/step_q_gap": 0.009486794717887048, "calib/step_q_w": 0.7568367346938777, "calib/step_q_w_n": 196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2300.0, "completions/max_terminated_length": 2300.0, "completions/mean_length": 478.53125, "completions/mean_terminated_length": 480.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.06293333333333333, "grad_norm": 0.006281663663685322, "kl": 0.037445068359375, "learning_rate": 3.916666666666667e-06, "loss": 0.0161, "num_tokens": 13672750.0, "reward": 0.2808471918106079, "reward_std": 0.2642785310745239, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6390796899795532, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.38285407423973083, "step": 59 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5813658796852074, "calib/avg_num_step_conf": 1.953125, "calib/ece": 0.272570281124498, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.5020080321285141, "calib/gap": 0.06716086434573842, "calib/mean_conf": 0.8275903614457831, "calib/mu_c": 0.8551020408163266, "calib/mu_w": 0.7879411764705881, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.2548995983935743, "calib/std_conf": 0.2103253763176534, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.77, "calib/step_q_c_n": 290.0, "calib/step_q_gap": 0.09714285714285709, "calib/step_q_w": 0.6728571428571429, "calib/step_q_w_n": 210.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 487.67578125, "completions/mean_terminated_length": 487.67578125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.064, "grad_norm": 0.006782458629459143, "kl": 0.03938865661621094, "learning_rate": 3.88888888888889e-06, "loss": 0.0381, "num_tokens": 13906451.0, "reward": 0.3251803517341614, "reward_std": 0.2544691264629364, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6601210832595825, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.3152291178703308, "step": 60 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5899904083310497, "calib/avg_num_step_conf": 2.1328125, "calib/ece": 0.24699604743083, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.5612648221343873, "calib/gap": 0.06016237325294593, "calib/mean_conf": 0.8630434782608696, "calib/mu_c": 0.8842073170731707, "calib/mu_w": 0.8240449438202248, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.23090909090909087, "calib/std_conf": 0.17829402950040316, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.7908571428571429, "calib/step_q_c_n": 315.0, "calib/step_q_gap": 0.1303809523809525, "calib/step_q_w": 0.6604761904761904, "calib/step_q_w_n": 231.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3000.0, "completions/max_terminated_length": 3000.0, "completions/mean_length": 427.28515625, "completions/mean_terminated_length": 427.28515625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.06506666666666666, "grad_norm": 0.006563232745975256, "kl": 0.04006195068359375, "learning_rate": 3.861111111111112e-06, "loss": 0.0374, "num_tokens": 14119900.0, "reward": 0.3624458909034729, "reward_std": 0.22378845512866974, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7091094255447388, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.309217631816864, "step": 61 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49980106100795746, "calib/avg_num_step_conf": 1.94921875, "calib/ece": 0.30947791164658645, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.5341365461847389, "calib/gap": 0.02925265251989395, "calib/mean_conf": 0.8130923694779115, "calib/mu_c": 0.8253103448275861, "calib/mu_w": 0.7960576923076922, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.270120481927711, "calib/std_conf": 0.23478879576435055, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7263306451612904, "calib/step_q_c_n": 248.0, "calib/step_q_gap": 0.14445813520113104, "calib/step_q_w": 0.5818725099601594, "calib/step_q_w_n": 251.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 479.015625, "completions/mean_terminated_length": 480.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.06613333333333334, "grad_norm": 0.005922415293753147, "kl": 0.038631439208984375, "learning_rate": 3.833333333333334e-06, "loss": 0.0266, "num_tokens": 14349608.0, "reward": 0.2788121998310089, "reward_std": 0.2546740770339966, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6321789026260376, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.3792419731616974, "step": 62 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.669293903074518, "calib/avg_num_step_conf": 1.74609375, "calib/ece": 0.20695652173913032, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.45454545454545453, "calib/gap": 0.17424895779051597, "calib/mean_conf": 0.78300395256917, "calib/mu_c": 0.8525657894736842, "calib/mu_w": 0.6783168316831683, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19458498023715404, "calib/std_conf": 0.25197807950999856, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.810561797752809, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.2016729088639202, "calib/step_q_w": 0.6088888888888888, "calib/step_q_w_n": 180.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1708.0, "completions/max_terminated_length": 1708.0, "completions/mean_length": 493.328125, "completions/mean_terminated_length": 495.2627868652344, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0672, "grad_norm": 0.0066199833527207375, "kl": 0.03546905517578125, "learning_rate": 3.8055555555555556e-06, "loss": 0.0361, "num_tokens": 14584540.0, "reward": 0.3798148036003113, "reward_std": 0.22585511207580566, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.7342566251754761, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.2894707918167114, "step": 63 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5892217630853994, "calib/avg_num_step_conf": 1.8359375, "calib/ece": 0.2399999999999999, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3557312252964427, "calib/gap": 0.04540151515151514, "calib/mean_conf": 0.7369960474308299, "calib/mu_c": 0.7527878787878788, "calib/mu_w": 0.7073863636363636, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.16241106719367582, "calib/std_conf": 0.2602956436799597, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7380622837370242, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.06750979754917885, "calib/step_q_w": 0.6705524861878454, "calib/step_q_w_n": 181.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2814.0, "completions/max_terminated_length": 2814.0, "completions/mean_length": 459.734375, "completions/mean_terminated_length": 461.53729248046875, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.06826666666666667, "grad_norm": 0.006584564223885536, "kl": 0.0449371337890625, "learning_rate": 3.777777777777778e-06, "loss": 0.033, "num_tokens": 14806008.0, "reward": 0.34331193566322327, "reward_std": 0.22489362955093384, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7015784978866577, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.33917340636253357, "step": 64 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6740588421385638, "calib/avg_num_step_conf": 1.97265625, "calib/ece": 0.27251968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.41338582677165353, "calib/gap": 0.10797595697564055, "calib/mean_conf": 0.8211811023622048, "calib/mu_c": 0.8675172413793103, "calib/mu_w": 0.7595412844036697, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.26141732283464564, "calib/std_conf": 0.19436192464861915, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8445418326693228, "calib/step_q_c_n": 251.0, "calib/step_q_gap": 0.12745521849609442, "calib/step_q_w": 0.7170866141732284, "calib/step_q_w_n": 254.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1166.0, "completions/max_terminated_length": 1166.0, "completions/mean_length": 392.15234375, "completions/mean_terminated_length": 393.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.06933333333333333, "grad_norm": 0.0067816670052707195, "kl": 0.055877685546875, "learning_rate": 3.7500000000000005e-06, "loss": -0.0072, "num_tokens": 15011423.0, "reward": 0.32644250988960266, "reward_std": 0.17547816038131714, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6920089721679688, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.34849900007247925, "step": 65 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6454637096774194, "calib/avg_num_step_conf": 2.078125, "calib/ece": 0.3492213114754099, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.4180327868852459, "calib/gap": 0.0714999999999999, "calib/mean_conf": 0.8076639344262295, "calib/mu_c": 0.844, "calib/mu_w": 0.7725000000000001, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.3325409836065575, "calib/std_conf": 0.20432183351295097, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7880275229357798, "calib/step_q_c_n": 218.0, "calib/step_q_gap": 0.15923771401858233, "calib/step_q_w": 0.6287898089171975, "calib/step_q_w_n": 314.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2553.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 500.94921875, "completions/mean_terminated_length": 512.9720458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.0704, "grad_norm": 0.006632333155721426, "kl": 0.05127716064453125, "learning_rate": 3.7222222222222225e-06, "loss": -0.0107, "num_tokens": 15246018.0, "reward": 0.2660596966743469, "reward_std": 0.21161139011383057, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6064273118972778, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.35633915662765503, "step": 66 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7607763023493359, "calib/avg_num_step_conf": 1.8359375, "calib/ece": 0.2023228346456694, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4251968503937008, "calib/gap": 0.13877494041539007, "calib/mean_conf": 0.8276771653543307, "calib/mu_c": 0.8763030303030305, "calib/mu_w": 0.7375280898876404, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19019685039370088, "calib/std_conf": 0.18565620508470213, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.858125, "calib/step_q_c_n": 288.0, "calib/step_q_gap": 0.1948282967032967, "calib/step_q_w": 0.6632967032967033, "calib/step_q_w_n": 182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1531.0, "completions/max_terminated_length": 1531.0, "completions/mean_length": 442.3515625, "completions/mean_terminated_length": 444.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.07146666666666666, "grad_norm": 0.007070634979754686, "kl": 0.04443359375, "learning_rate": 3.694444444444445e-06, "loss": 0.0114, "num_tokens": 15464268.0, "reward": 0.40109509229660034, "reward_std": 0.18799439072608948, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7620258331298828, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.28639817237854004, "step": 67 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6540259740259741, "calib/avg_num_step_conf": 2.21484375, "calib/ece": 0.2645600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.472, "calib/gap": 0.11399350649350659, "calib/mean_conf": 0.8212, "calib/mu_c": 0.8713571428571429, "calib/mu_w": 0.7573636363636363, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26288000000000006, "calib/std_conf": 0.20073903456976172, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8311111111111111, "calib/step_q_c_n": 252.0, "calib/step_q_gap": 0.22873015873015878, "calib/step_q_w": 0.6023809523809524, "calib/step_q_w_n": 315.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 439.55078125, "completions/mean_terminated_length": 441.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.07253333333333334, "grad_norm": 0.007071278523653746, "kl": 0.042934417724609375, "learning_rate": 3.6666666666666666e-06, "loss": -0.0096, "num_tokens": 15680881.0, "reward": 0.33351147174835205, "reward_std": 0.19385266304016113, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6848187446594238, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.3224833011627197, "step": 68 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6231396534148828, "calib/avg_num_step_conf": 1.96875, "calib/ece": 0.2413524590163934, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.3237704918032787, "calib/gap": 0.0887210329595649, "calib/mean_conf": 0.7649590163934428, "calib/mu_c": 0.8045925925925925, "calib/mu_w": 0.7158715596330276, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.22651639344262292, "calib/std_conf": 0.2210509509378051, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.7756903765690376, "calib/step_q_c_n": 239.0, "calib/step_q_gap": 0.13474698034262256, "calib/step_q_w": 0.640943396226415, "calib/step_q_w_n": 265.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2564.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 544.4296875, "completions/mean_terminated_length": 557.4960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.0736, "grad_norm": 0.005974503234028816, "kl": 0.03606414794921875, "learning_rate": 3.638888888888889e-06, "loss": 0.0062, "num_tokens": 15924751.0, "reward": 0.31283414363861084, "reward_std": 0.23490868508815765, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6655257940292358, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.334388792514801, "step": 69 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.7731869014933369, "calib/avg_num_step_conf": 1.828125, "calib/ece": 0.19134146341463423, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.36585365853658536, "calib/gap": 0.26039576776267326, "calib/mean_conf": 0.7427235772357723, "calib/mu_c": 0.8581021897810219, "calib/mu_w": 0.5977064220183487, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.1885772357723578, "calib/std_conf": 0.27380542616123127, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.8553478260869566, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.2660621118012423, "calib/step_q_w": 0.5892857142857143, "calib/step_q_w_n": 238.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 545.76953125, "completions/mean_terminated_length": 547.9098510742188, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07466666666666667, "grad_norm": 0.027560407295823097, "kl": 0.07869338989257812, "learning_rate": 3.6111111111111115e-06, "loss": 0.035, "num_tokens": 16171460.0, "reward": 0.37544897198677063, "reward_std": 0.20757484436035156, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7419331669807434, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.2894727289676666, "step": 70 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6098376623376623, "calib/avg_num_step_conf": 1.66796875, "calib/ece": 0.2484251968503938, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.4330708661417323, "calib/gap": 0.0649558441558441, "calib/mean_conf": 0.7885826771653544, "calib/mu_c": 0.8141558441558441, "calib/mu_w": 0.7492, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2153543307086615, "calib/std_conf": 0.23665724711765163, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.803585657370518, "calib/step_q_c_n": 251.0, "calib/step_q_gap": 0.08364247555233617, "calib/step_q_w": 0.7199431818181818, "calib/step_q_w_n": 176.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1757.0, "completions/max_terminated_length": 1757.0, "completions/mean_length": 449.4296875, "completions/mean_terminated_length": 451.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.07573333333333333, "grad_norm": 0.5438715815544128, "kl": 1.3751029968261719, "learning_rate": 3.5833333333333335e-06, "loss": 0.0428, "num_tokens": 16390922.0, "reward": 0.32291221618652344, "reward_std": 0.2514145076274872, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6859527230262756, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.3557532727718353, "step": 71 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7759275674306698, "calib/avg_num_step_conf": 1.671875, "calib/ece": 0.2571764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.45098039215686275, "calib/gap": 0.14297581360010092, "calib/mean_conf": 0.8178823529411765, "calib/mu_c": 0.8773154362416106, "calib/mu_w": 0.7343396226415096, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2453725490196078, "calib/std_conf": 0.19438849791063734, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.835529411764706, "calib/step_q_c_n": 255.0, "calib/step_q_gap": 0.09801496089765405, "calib/step_q_w": 0.7375144508670519, "calib/step_q_w_n": 173.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1742.0, "completions/max_terminated_length": 1742.0, "completions/mean_length": 419.8828125, "completions/mean_terminated_length": 419.8828125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.0768, "grad_norm": 0.006729037035256624, "kl": 0.05239105224609375, "learning_rate": 3.555555555555556e-06, "loss": 0.0324, "num_tokens": 16602820.0, "reward": 0.37682515382766724, "reward_std": 0.1965407133102417, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7313547134399414, "rewards/format_reward_step": 0.99609375, "rewards/step_l1_reward": -0.2933293282985687, "step": 72 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7297959183673469, "calib/avg_num_step_conf": 1.61328125, "calib/ece": 0.14845238095238103, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.47619047619047616, "calib/gap": 0.1524311688311687, "calib/mean_conf": 0.8398809523809524, "calib/mu_c": 0.8864571428571428, "calib/mu_w": 0.7340259740259741, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.14694444444444454, "calib/std_conf": 0.17984005712055565, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8746739130434783, "calib/step_q_c_n": 276.0, "calib/step_q_gap": 0.10839654078070471, "calib/step_q_w": 0.7662773722627736, "calib/step_q_w_n": 137.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2847.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 430.890625, "completions/mean_terminated_length": 430.890625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.07786666666666667, "grad_norm": 0.006531127728521824, "kl": 0.050022125244140625, "learning_rate": 3.5277777777777784e-06, "loss": 0.0332, "num_tokens": 16820160.0, "reward": 0.43554988503456116, "reward_std": 0.2307089865207672, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.7826601266860962, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.2443729043006897, "step": 73 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6947563025210084, "calib/avg_num_step_conf": 1.671875, "calib/ece": 0.28581967213114756, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.47950819672131145, "calib/gap": 0.15664806722689073, "calib/mean_conf": 0.7800819672131148, "calib/mu_c": 0.85648, "calib/mu_w": 0.6998319327731093, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.2768032786885246, "calib/std_conf": 0.24842288080530242, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.8335960591133006, "calib/step_q_c_n": 203.0, "calib/step_q_gap": 0.17937383689107844, "calib/step_q_w": 0.6542222222222221, "calib/step_q_w_n": 225.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 470.27734375, "completions/mean_terminated_length": 473.9803161621094, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.07893333333333333, "grad_norm": 0.006616615690290928, "kl": 0.0474090576171875, "learning_rate": 3.5e-06, "loss": 0.0049, "num_tokens": 17044479.0, "reward": 0.3096698522567749, "reward_std": 0.24875548481941223, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6624257564544678, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.3313673734664917, "step": 74 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7571277289019225, "calib/avg_num_step_conf": 1.49609375, "calib/ece": 0.12769841269841273, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.5992063492063492, "calib/gap": 0.18189638318670576, "calib/mean_conf": 0.8603174603174604, "calib/mu_c": 0.9079569892473118, "calib/mu_w": 0.7260606060606061, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.12496031746031748, "calib/std_conf": 0.17724718865362435, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.9028679245283019, "calib/step_q_c_n": 265.0, "calib/step_q_gap": 0.13634250079948818, "calib/step_q_w": 0.7665254237288137, "calib/step_q_w_n": 118.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2414.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 379.8828125, "completions/mean_terminated_length": 381.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.08, "grad_norm": 0.006709123495966196, "kl": 0.06363296508789062, "learning_rate": 3.4722222222222224e-06, "loss": 0.0133, "num_tokens": 17246481.0, "reward": 0.4585932791233063, "reward_std": 0.15374954044818878, "rewards/accuracy_reward_step": 0.7265625, "rewards/final_brier_reward_step": 0.7987093925476074, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.21902284026145935, "step": 75 }, { "calib/answer_extract_rate": 1.0, "calib/auroc": 0.7128181336161188, "calib/avg_num_step_conf": 1.6484375, "calib/ece": 0.19640625000000012, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.53125, "calib/gap": 0.13993107104984082, "calib/mean_conf": 0.807578125, "calib/mu_c": 0.8578658536585366, "calib/mu_w": 0.7179347826086958, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18167968750000008, "calib/std_conf": 0.2271485819270822, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.8227916666666668, "calib/step_q_c_n": 240.0, "calib/step_q_gap": 0.14850595238095254, "calib/step_q_w": 0.6742857142857143, "calib/step_q_w_n": 182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1130.0, "completions/max_terminated_length": 1130.0, "completions/mean_length": 403.10546875, "completions/mean_terminated_length": 404.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.08106666666666666, "grad_norm": 0.0064852419309318066, "kl": 0.06375885009765625, "learning_rate": 3.444444444444445e-06, "loss": -0.0124, "num_tokens": 17452732.0, "reward": 0.38530316948890686, "reward_std": 0.22022101283073425, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7547367215156555, "rewards/format_reward_step": 1.0, "rewards/step_l1_reward": -0.3122553825378418, "step": 76 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6192792545902988, "calib/avg_num_step_conf": 1.8515625, "calib/ece": 0.2002371541501976, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4505928853754941, "calib/gap": 0.09472663743491372, "calib/mean_conf": 0.7932015810276679, "calib/mu_c": 0.8265243902439026, "calib/mu_w": 0.7317977528089888, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.17260869565217385, "calib/std_conf": 0.22309985285814607, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8138545454545455, "calib/step_q_c_n": 275.0, "calib/step_q_gap": 0.19908067610781177, "calib/step_q_w": 0.6147738693467337, "calib/step_q_w_n": 199.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2043.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 425.1484375, "completions/mean_terminated_length": 426.8157043457031, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.08213333333333334, "grad_norm": 0.006517153698951006, "kl": 0.0614471435546875, "learning_rate": 3.416666666666667e-06, "loss": 0.0051, "num_tokens": 17666234.0, "reward": 0.37873509526252747, "reward_std": 0.2252240777015686, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7295206785202026, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.2962692677974701, "step": 77 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6582236842105265, "calib/avg_num_step_conf": 1.60546875, "calib/ece": 0.19643137254901955, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.5450980392156862, "calib/gap": 0.16037828947368415, "calib/mean_conf": 0.8103137254901961, "calib/mu_c": 0.8700624999999998, "calib/mu_w": 0.7096842105263157, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18964705882352934, "calib/std_conf": 0.23157918987715614, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8385714285714285, "calib/step_q_c_n": 238.0, "calib/step_q_gap": 0.17065235342691976, "calib/step_q_w": 0.6679190751445088, "calib/step_q_w_n": 173.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2206.0, "completions/max_terminated_length": 2206.0, "completions/mean_length": 451.640625, "completions/mean_terminated_length": 451.640625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.0832, "grad_norm": 0.0056913201697170734, "kl": 0.05938720703125, "learning_rate": 3.3888888888888893e-06, "loss": 0.0458, "num_tokens": 17889878.0, "reward": 0.3906322121620178, "reward_std": 0.22954101860523224, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7474750280380249, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.28964805603027344, "step": 78 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7005917159763314, "calib/avg_num_step_conf": 1.52734375, "calib/ece": 0.16334645669291337, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.515748031496063, "calib/gap": 0.15910824921684652, "calib/mean_conf": 0.8051574803149606, "calib/mu_c": 0.8584023668639053, "calib/mu_w": 0.6992941176470587, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1515748031496063, "calib/std_conf": 0.2309117814025555, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8237051792828686, "calib/step_q_c_n": 251.0, "calib/step_q_gap": 0.13899089356858274, "calib/step_q_w": 0.6847142857142858, "calib/step_q_w_n": 140.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1767.0, "completions/max_terminated_length": 1767.0, "completions/mean_length": 427.8359375, "completions/mean_terminated_length": 429.5137634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.08426666666666667, "grad_norm": 0.005721926223486662, "kl": 0.05712890625, "learning_rate": 3.3611111111111117e-06, "loss": -0.0182, "num_tokens": 18105780.0, "reward": 0.4099389910697937, "reward_std": 0.20847250521183014, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.769273042678833, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.2798638343811035, "step": 79 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7394636015325671, "calib/avg_num_step_conf": 1.46484375, "calib/ece": 0.1962352941176469, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.6235294117647059, "calib/gap": 0.15399106002554308, "calib/mean_conf": 0.8647058823529412, "calib/mu_c": 0.9136206896551726, "calib/mu_w": 0.7596296296296295, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18929411764705867, "calib/std_conf": 0.19809636139205197, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8561632653061224, "calib/step_q_c_n": 245.0, "calib/step_q_gap": 0.1252401883830454, "calib/step_q_w": 0.730923076923077, "calib/step_q_w_n": 130.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1303.0, "completions/max_terminated_length": 1303.0, "completions/mean_length": 368.0234375, "completions/mean_terminated_length": 369.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.08533333333333333, "grad_norm": 0.007674939930438995, "kl": 0.07657623291015625, "learning_rate": 3.3333333333333333e-06, "loss": 0.0131, "num_tokens": 18302154.0, "reward": 0.4051653742790222, "reward_std": 0.23084446787834167, "rewards/accuracy_reward_step": 0.6796875, "rewards/final_brier_reward_step": 0.7694988250732422, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.29276180267333984, "step": 80 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7535815673070575, "calib/avg_num_step_conf": 1.4453125, "calib/ece": 0.198531746031746, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.5476190476190477, "calib/gap": 0.23549019607843125, "calib/mean_conf": 0.7851984126984127, "calib/mu_c": 0.8777124183006535, "calib/mu_w": 0.6422222222222222, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.1882936507936508, "calib/std_conf": 0.27287337955051927, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8794565217391304, "calib/step_q_c_n": 184.0, "calib/step_q_gap": 0.24569308087891528, "calib/step_q_w": 0.6337634408602151, "calib/step_q_w_n": 186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 430.89453125, "completions/mean_terminated_length": 432.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.0864, "grad_norm": 0.00645162258297205, "kl": 0.06568145751953125, "learning_rate": 3.3055555555555558e-06, "loss": 0.0158, "num_tokens": 18518711.0, "reward": 0.39590904116630554, "reward_std": 0.24231690168380737, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7513003349304199, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.2743260860443115, "step": 81 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7049266247379455, "calib/avg_num_step_conf": 1.7734375, "calib/ece": 0.20907630522088347, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.6224899598393574, "calib/gap": 0.18125157232704392, "calib/mean_conf": 0.8314056224899599, "calib/mu_c": 0.8969182389937106, "calib/mu_w": 0.7156666666666667, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20096385542168668, "calib/std_conf": 0.2432742517711509, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.8766331658291456, "calib/step_q_c_n": 199.0, "calib/step_q_gap": 0.39949591092718484, "calib/step_q_w": 0.4771372549019608, "calib/step_q_w_n": 255.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 393.23046875, "completions/mean_terminated_length": 397.893310546875, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.08746666666666666, "grad_norm": 0.006193524692207575, "kl": 0.066192626953125, "learning_rate": 3.277777777777778e-06, "loss": 0.0215, "num_tokens": 18724930.0, "reward": 0.38317593932151794, "reward_std": 0.25840771198272705, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7358046770095825, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.28820282220840454, "step": 82 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7366818873668187, "calib/avg_num_step_conf": 1.34375, "calib/ece": 0.27661417322834636, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.6496062992125984, "calib/gap": 0.19511288685946238, "calib/mean_conf": 0.817244094488189, "calib/mu_c": 0.9002054794520549, "calib/mu_w": 0.7050925925925925, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.25952755905511804, "calib/std_conf": 0.27007341424008796, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8611235955056181, "calib/step_q_c_n": 178.0, "calib/step_q_gap": 0.16425612562610004, "calib/step_q_w": 0.6968674698795181, "calib/step_q_w_n": 166.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 458.265625, "completions/mean_terminated_length": 458.265625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.08853333333333334, "grad_norm": 0.005535097327083349, "kl": 0.06232452392578125, "learning_rate": 3.2500000000000002e-06, "loss": 0.0012, "num_tokens": 18949510.0, "reward": 0.3472113311290741, "reward_std": 0.2140897512435913, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.704337477684021, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3208523690700531, "step": 83 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7351, "calib/avg_num_step_conf": 1.625, "calib/ece": 0.22379999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.604, "calib/gap": 0.2233999999999995, "calib/mean_conf": 0.8156400000000001, "calib/mu_c": 0.9049999999999996, "calib/mu_w": 0.6816000000000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.21971999999999994, "calib/std_conf": 0.24825750824496726, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.8542105263157894, "calib/step_q_c_n": 247.0, "calib/step_q_gap": 0.1745655559015883, "calib/step_q_w": 0.6796449704142011, "calib/step_q_w_n": 169.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 384.80859375, "completions/mean_terminated_length": 387.8385925292969, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.0896, "grad_norm": 0.006911741103976965, "kl": 0.0834808349609375, "learning_rate": 3.2222222222222227e-06, "loss": 0.0439, "num_tokens": 19153941.0, "reward": 0.37587296962738037, "reward_std": 0.2167174518108368, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7413082122802734, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.3020622432231903, "step": 84 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7137548850022423, "calib/avg_num_step_conf": 1.6796875, "calib/ece": 0.28148000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.548, "calib/gap": 0.20196489204945867, "calib/mean_conf": 0.79628, "calib/mu_c": 0.8940310077519381, "calib/mu_w": 0.6920661157024794, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.28088, "calib/std_conf": 0.2644060543936163, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.8198295454545456, "calib/step_q_c_n": 176.0, "calib/step_q_gap": 0.18455395490336446, "calib/step_q_w": 0.6352755905511811, "calib/step_q_w_n": 254.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 428.75390625, "completions/mean_terminated_length": 433.83795166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.09066666666666667, "grad_norm": 0.006645851768553257, "kl": 0.06461334228515625, "learning_rate": 3.1944444444444443e-06, "loss": -0.0305, "num_tokens": 19371526.0, "reward": 0.3220665454864502, "reward_std": 0.23552703857421875, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6779835820198059, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.3268192708492279, "step": 85 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.734944, "calib/avg_num_step_conf": 1.80859375, "calib/ece": 0.23211999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.464, "calib/gap": 0.2619999999999999, "calib/mean_conf": 0.7321200000000001, "calib/mu_c": 0.8631199999999999, "calib/mu_w": 0.60112, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.23211999999999997, "calib/std_conf": 0.29388825359309617, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8383815028901735, "calib/step_q_c_n": 173.0, "calib/step_q_gap": 0.17045046840741485, "calib/step_q_w": 0.6679310344827587, "calib/step_q_w_n": 290.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2667.0, "completions/max_terminated_length": 2667.0, "completions/mean_length": 433.67578125, "completions/mean_terminated_length": 438.8182067871094, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.09173333333333333, "grad_norm": 0.006340604741126299, "kl": 0.07331085205078125, "learning_rate": 3.1666666666666667e-06, "loss": -0.0278, "num_tokens": 19588059.0, "reward": 0.3461582064628601, "reward_std": 0.222943514585495, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7213422060012817, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.3212132453918457, "step": 86 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6578105493133583, "calib/avg_num_step_conf": 1.4296875, "calib/ece": 0.15968, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.608, "calib/gap": 0.1909269662921348, "calib/mean_conf": 0.81344, "calib/mu_c": 0.8684269662921348, "calib/mu_w": 0.6775, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.13055999999999998, "calib/std_conf": 0.257904180656305, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.8738202247191011, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.15685052774940433, "calib/step_q_w": 0.7169696969696968, "calib/step_q_w_n": 99.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 390.3046875, "completions/mean_terminated_length": 394.9328308105469, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.0928, "grad_norm": 0.0069213323295116425, "kl": 0.074493408203125, "learning_rate": 3.138888888888889e-06, "loss": 0.0473, "num_tokens": 19793473.0, "reward": 0.44103753566741943, "reward_std": 0.209333136677742, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7777742147445679, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.23007416725158691, "step": 87 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.7320478723404256, "calib/avg_num_step_conf": 1.54296875, "calib/ece": 0.20110236220472455, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.5866141732283464, "calib/gap": 0.23726728723404245, "calib/mean_conf": 0.8006299212598426, "calib/mu_c": 0.8884375, "calib/mu_w": 0.6511702127659575, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18590551181102377, "calib/std_conf": 0.26281261259867017, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.8605769230769231, "calib/step_q_c_n": 208.0, "calib/step_q_gap": 0.269721308103661, "calib/step_q_w": 0.5908556149732621, "calib/step_q_w_n": 187.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1747.0, "completions/max_terminated_length": 1747.0, "completions/mean_length": 425.328125, "completions/mean_terminated_length": 426.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.09386666666666667, "grad_norm": 0.010172465816140175, "kl": 0.13602447509765625, "learning_rate": 3.1111111111111116e-06, "loss": 0.0003, "num_tokens": 20012205.0, "reward": 0.4115146994590759, "reward_std": 0.22872042655944824, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7693734169006348, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.2690003216266632, "step": 88 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7624516129032258, "calib/avg_num_step_conf": 1.4375, "calib/ece": 0.27738955823293177, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.4979919678714859, "calib/gap": 0.21976322580645136, "calib/mean_conf": 0.7500803212851407, "calib/mu_c": 0.8604032258064515, "calib/mu_w": 0.6406400000000001, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.2647389558232932, "calib/std_conf": 0.28767869176056377, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.766627906976744, "calib/step_q_c_n": 172.0, "calib/step_q_gap": 0.07586260085429508, "calib/step_q_w": 0.6907653061224489, "calib/step_q_w_n": 196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2629.0, "completions/max_terminated_length": 2629.0, "completions/mean_length": 445.9765625, "completions/mean_terminated_length": 449.4881896972656, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.09493333333333333, "grad_norm": 0.006914828438311815, "kl": 0.09096908569335938, "learning_rate": 3.0833333333333336e-06, "loss": -0.0262, "num_tokens": 20235263.0, "reward": 0.317240834236145, "reward_std": 0.21951505541801453, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.683097243309021, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.33767807483673096, "step": 89 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6417276210379659, "calib/avg_num_step_conf": 1.35546875, "calib/ece": 0.19519841269841268, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.5833333333333334, "calib/gap": 0.17187669801462913, "calib/mean_conf": 0.7947222222222222, "calib/mu_c": 0.8540606060606062, "calib/mu_w": 0.6821839080459771, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.16757936507936505, "calib/std_conf": 0.2754367423383859, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8222072072072072, "calib/step_q_c_n": 222.0, "calib/step_q_gap": 0.15652720720720714, "calib/step_q_w": 0.66568, "calib/step_q_w_n": 125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1354.0, "completions/max_terminated_length": 1354.0, "completions/mean_length": 394.7734375, "completions/mean_terminated_length": 401.0397033691406, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.096, "grad_norm": 0.005598283838480711, "kl": 0.07326507568359375, "learning_rate": 3.055555555555556e-06, "loss": -0.0349, "num_tokens": 20439645.0, "reward": 0.3961567282676697, "reward_std": 0.22430838644504547, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7443863153457642, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.27785414457321167, "step": 90 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6981297282151863, "calib/avg_num_step_conf": 1.453125, "calib/ece": 0.2087698412698412, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.5873015873015873, "calib/gap": 0.16699215466517248, "calib/mean_conf": 0.8001190476190476, "calib/mu_c": 0.85710843373494, "calib/mu_w": 0.6901162790697675, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.175079365079365, "calib/std_conf": 0.26773370171499444, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.8462385321100918, "calib/step_q_c_n": 218.0, "calib/step_q_gap": 0.20649827236983198, "calib/step_q_w": 0.6397402597402598, "calib/step_q_w_n": 154.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1892.0, "completions/max_terminated_length": 1892.0, "completions/mean_length": 417.90234375, "completions/mean_terminated_length": 421.1929016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.09706666666666666, "grad_norm": 0.005817817524075508, "kl": 0.077728271484375, "learning_rate": 3.0277777777777776e-06, "loss": 0.014, "num_tokens": 20654340.0, "reward": 0.39327478408813477, "reward_std": 0.22805273532867432, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.739641010761261, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.27731022238731384, "step": 91 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6651152043308907, "calib/avg_num_step_conf": 1.33984375, "calib/ece": 0.2482936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.626984126984127, "calib/gap": 0.14431966726084378, "calib/mean_conf": 0.8218650793650795, "calib/mu_c": 0.878562091503268, "calib/mu_w": 0.7342424242424243, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2315079365079365, "calib/std_conf": 0.2564255470141414, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.8427638190954773, "calib/step_q_c_n": 199.0, "calib/step_q_gap": 0.1665832635399217, "calib/step_q_w": 0.6761805555555556, "calib/step_q_w_n": 144.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 373.69921875, "completions/mean_terminated_length": 378.1304626464844, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.09813333333333334, "grad_norm": 0.0057909623719751835, "kl": 0.08457183837890625, "learning_rate": 3e-06, "loss": -0.0316, "num_tokens": 20856727.0, "reward": 0.36944615840911865, "reward_std": 0.26509976387023926, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6994296908378601, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.2753811180591583, "step": 92 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7377803335250144, "calib/avg_num_step_conf": 1.4765625, "calib/ece": 0.20912698412698408, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.49603174603174605, "calib/gap": 0.2634962622196666, "calib/mean_conf": 0.7483333333333334, "calib/mu_c": 0.8643971631205674, "calib/mu_w": 0.6009009009009008, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.1989682539682539, "calib/std_conf": 0.301340260139518, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8316836734693878, "calib/step_q_c_n": 196.0, "calib/step_q_gap": 0.1358045525902668, "calib/step_q_w": 0.695879120879121, "calib/step_q_w_n": 182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2485.0, "completions/max_terminated_length": 2485.0, "completions/mean_length": 417.0625, "completions/mean_terminated_length": 418.69805908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.0992, "grad_norm": 0.005857251584529877, "kl": 0.077850341796875, "learning_rate": 2.9722222222222225e-06, "loss": 0.0395, "num_tokens": 21069271.0, "reward": 0.368843674659729, "reward_std": 0.2849258780479431, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7412355542182922, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3097981810569763, "step": 93 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7032701111837802, "calib/avg_num_step_conf": 1.5234375, "calib/ece": 0.22566265060240964, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.4979919678714859, "calib/gap": 0.21499542184434284, "calib/mean_conf": 0.7471084337349396, "calib/mu_c": 0.8420863309352519, "calib/mu_w": 0.627090909090909, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.2072690763052209, "calib/std_conf": 0.28842821573431987, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8143243243243242, "calib/step_q_c_n": 185.0, "calib/step_q_gap": 0.22725115359261694, "calib/step_q_w": 0.5870731707317073, "calib/step_q_w_n": 205.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 433.34765625, "completions/mean_terminated_length": 433.34765625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.10026666666666667, "grad_norm": 0.005978456698358059, "kl": 0.07855224609375, "learning_rate": 2.944444444444445e-06, "loss": 0.0582, "num_tokens": 21288888.0, "reward": 0.35172322392463684, "reward_std": 0.24280011653900146, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.7168351411819458, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.315732479095459, "step": 94 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7224184782608696, "calib/avg_num_step_conf": 1.2734375, "calib/ece": 0.1770634920634921, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.5912698412698413, "calib/gap": 0.23824999999999996, "calib/mean_conf": 0.7662698412698412, "calib/mu_c": 0.8532500000000001, "calib/mu_w": 0.6150000000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.15420634920634924, "calib/std_conf": 0.3048164530382701, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.8723756906077347, "calib/step_q_c_n": 181.0, "calib/step_q_gap": 0.2783756906077347, "calib/step_q_w": 0.594, "calib/step_q_w_n": 145.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 388.80859375, "completions/mean_terminated_length": 393.4189758300781, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.10133333333333333, "grad_norm": 0.005680699832737446, "kl": 0.0786590576171875, "learning_rate": 2.916666666666667e-06, "loss": 0.0023, "num_tokens": 21494551.0, "reward": 0.4002438187599182, "reward_std": 0.2253013551235199, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.749567985534668, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.2686115801334381, "step": 95 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.8099490795142968, "calib/avg_num_step_conf": 1.375, "calib/ece": 0.12677165354330705, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.6417322834645669, "calib/gap": 0.26587230708969833, "calib/mean_conf": 0.8439370078740158, "calib/mu_c": 0.9161621621621622, "calib/mu_w": 0.6502898550724638, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.1211811023622047, "calib/std_conf": 0.23293964437543868, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.8667782426778241, "calib/step_q_c_n": 239.0, "calib/step_q_gap": 0.24863664975747013, "calib/step_q_w": 0.618141592920354, "calib/step_q_w_n": 113.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 362.4140625, "completions/mean_terminated_length": 363.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.1024, "grad_norm": 0.006683533079922199, "kl": 0.0844573974609375, "learning_rate": 2.888888888888889e-06, "loss": 0.0758, "num_tokens": 21693145.0, "reward": 0.48853859305381775, "reward_std": 0.20125803351402283, "rewards/accuracy_reward_step": 0.72265625, "rewards/final_brier_reward_step": 0.8290726542472839, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.1934017539024353, "step": 96 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6631067961165049, "calib/avg_num_step_conf": 1.5859375, "calib/ece": 0.2184584980237153, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5177865612648221, "calib/gap": 0.17730355987055002, "calib/mean_conf": 0.7696837944664032, "calib/mu_c": 0.8418666666666665, "calib/mu_w": 0.6645631067961165, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.19762845849802363, "calib/std_conf": 0.28143319473200823, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.816930693069307, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.19962677150067953, "calib/step_q_w": 0.6173039215686275, "calib/step_q_w_n": 204.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1005.0, "completions/max_terminated_length": 1005.0, "completions/mean_length": 386.8203125, "completions/mean_terminated_length": 389.86614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.10346666666666667, "grad_norm": 1.6125926971435547, "kl": 17.58576202392578, "learning_rate": 2.861111111111111e-06, "loss": 0.1785, "num_tokens": 21897243.0, "reward": 0.35906365513801575, "reward_std": 0.26138195395469666, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.717347264289856, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.3125011920928955, "step": 97 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6870081365879686, "calib/avg_num_step_conf": 1.421875, "calib/ece": 0.2273092369477912, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.5502008032128514, "calib/gap": 0.20064625850340145, "calib/mean_conf": 0.7567871485943775, "calib/mu_c": 0.8389795918367348, "calib/mu_w": 0.6383333333333333, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.1968674698795181, "calib/std_conf": 0.3095405957174121, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.8312499999999999, "calib/step_q_c_n": 192.0, "calib/step_q_gap": 0.20409883720930222, "calib/step_q_w": 0.6271511627906977, "calib/step_q_w_n": 172.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2810.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 436.0, "completions/mean_terminated_length": 439.4330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.10453333333333334, "grad_norm": 0.005526782479137182, "kl": 0.09055328369140625, "learning_rate": 2.8333333333333335e-06, "loss": 0.0457, "num_tokens": 22115043.0, "reward": 0.36131489276885986, "reward_std": 0.2622639238834381, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7069636583328247, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.29214632511138916, "step": 98 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6965757317419722, "calib/avg_num_step_conf": 1.69140625, "calib/ece": 0.27004081632653065, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.34285714285714286, "calib/gap": 0.214514776925263, "calib/mean_conf": 0.6037551020408163, "calib/mu_c": 0.737717391304348, "calib/mu_w": 0.523202614379085, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.24914285714285717, "calib/std_conf": 0.34016529779051236, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.6817730496453901, "calib/step_q_c_n": 141.0, "calib/step_q_gap": 0.15404907704265047, "calib/step_q_w": 0.5277239726027396, "calib/step_q_w_n": 292.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2496.0, "completions/max_terminated_length": 2496.0, "completions/mean_length": 465.58203125, "completions/mean_terminated_length": 476.7560119628906, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1056, "grad_norm": 0.005705671850591898, "kl": 0.11505126953125, "learning_rate": 2.805555555555556e-06, "loss": -0.0724, "num_tokens": 22340032.0, "reward": 0.2724548578262329, "reward_std": 0.2788415849208832, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6610808372497559, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.37788987159729004, "step": 99 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7803045090767129, "calib/avg_num_step_conf": 1.73046875, "calib/ece": 0.14891999999999994, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.408, "calib/gap": 0.31491769145682863, "calib/mean_conf": 0.6753199999999999, "calib/mu_c": 0.8126241134751773, "calib/mu_w": 0.49770642201834864, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.13011999999999996, "calib/std_conf": 0.32655182988309833, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7673272727272726, "calib/step_q_c_n": 220.0, "calib/step_q_gap": 0.28342592743579276, "calib/step_q_w": 0.4839013452914799, "calib/step_q_w_n": 223.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2049.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 462.1953125, "completions/mean_terminated_length": 464.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.10666666666666667, "grad_norm": 0.005212652962654829, "kl": 0.13134765625, "learning_rate": 2.7777777777777783e-06, "loss": 0.0628, "num_tokens": 22565762.0, "reward": 0.3926239013671875, "reward_std": 0.23804575204849243, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7540972828865051, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.27041196823120117, "step": 100 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6799299960375115, "calib/avg_num_step_conf": 1.7890625, "calib/ece": 0.25408906882591087, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.3562753036437247, "calib/gap": 0.1919475630696077, "calib/mean_conf": 0.654008097165992, "calib/mu_c": 0.758141592920354, "calib/mu_w": 0.5661940298507463, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.22530364372469633, "calib/std_conf": 0.33032514949346076, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6903804347826087, "calib/step_q_c_n": 184.0, "calib/step_q_gap": 0.1653676610599809, "calib/step_q_w": 0.5250127737226278, "calib/step_q_w_n": 274.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 464.21484375, "completions/mean_terminated_length": 469.7193908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.10773333333333333, "grad_norm": 0.005432981997728348, "kl": 0.164337158203125, "learning_rate": 2.7500000000000004e-06, "loss": 0.0066, "num_tokens": 22791593.0, "reward": 0.29547733068466187, "reward_std": 0.2509266138076782, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6622054576873779, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.3485945761203766, "step": 101 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.760089098532495, "calib/avg_num_step_conf": 1.609375, "calib/ece": 0.16680000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.424, "calib/gap": 0.30636399371069184, "calib/mean_conf": 0.6665599999999999, "calib/mu_c": 0.7964583333333334, "calib/mu_w": 0.49009433962264154, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.12868000000000007, "calib/std_conf": 0.3397554508760676, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7313106796116505, "calib/step_q_c_n": 206.0, "calib/step_q_gap": 0.21928300970873793, "calib/step_q_w": 0.5120276699029126, "calib/step_q_w_n": 206.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2606.0, "completions/max_terminated_length": 2606.0, "completions/mean_length": 379.53515625, "completions/mean_terminated_length": 382.52362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.1088, "grad_norm": 0.0060148392803967, "kl": 0.206390380859375, "learning_rate": 2.7222222222222224e-06, "loss": 0.0235, "num_tokens": 22995450.0, "reward": 0.3910207748413086, "reward_std": 0.21820616722106934, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7527488470077515, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.2753947973251343, "step": 102 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.7216002747252748, "calib/avg_num_step_conf": 1.58984375, "calib/ece": 0.15918032786885244, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.39344262295081966, "calib/gap": 0.27140934065934064, "calib/mean_conf": 0.6452459016393443, "calib/mu_c": 0.7609285714285714, "calib/mu_w": 0.48951923076923076, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.115327868852459, "calib/std_conf": 0.349504832666122, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.7487878787878788, "calib/step_q_c_n": 198.0, "calib/step_q_gap": 0.2991505582137161, "calib/step_q_w": 0.44963732057416267, "calib/step_q_w_n": 209.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 496.28125, "completions/mean_terminated_length": 504.15875244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.10986666666666667, "grad_norm": 0.004697397816926241, "kl": 0.180694580078125, "learning_rate": 2.6944444444444444e-06, "loss": 0.0238, "num_tokens": 23227050.0, "reward": 0.3566873073577881, "reward_std": 0.24476046860218048, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6928308606147766, "rewards/format_reward_step": 0.90625, "rewards/step_l1_reward": -0.2700812518596649, "step": 103 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.6650138696255201, "calib/avg_num_step_conf": 1.84375, "calib/ece": 0.25530864197530867, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.3168724279835391, "calib/gap": 0.19997642163661578, "calib/mean_conf": 0.5960493827160492, "calib/mu_c": 0.71126213592233, "calib/mu_w": 0.5112857142857142, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.2137448559670782, "calib/std_conf": 0.3493545336101135, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.7314465753424658, "calib/step_q_c_n": 146.0, "calib/step_q_gap": 0.2991766366921591, "calib/step_q_w": 0.4322699386503067, "calib/step_q_w_n": 326.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 444.640625, "completions/mean_terminated_length": 451.69842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.11093333333333333, "grad_norm": 0.005236889701336622, "kl": 0.1962127685546875, "learning_rate": 2.666666666666667e-06, "loss": 0.0415, "num_tokens": 23447558.0, "reward": 0.2982354760169983, "reward_std": 0.2744593024253845, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6419824361801147, "rewards/format_reward_step": 0.91796875, "rewards/step_l1_reward": -0.30957403779029846, "step": 104 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6850935828877006, "calib/avg_num_step_conf": 1.6875, "calib/ece": 0.23959349593495938, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.34552845528455284, "calib/gap": 0.23148128342245977, "calib/mean_conf": 0.604390243902439, "calib/mu_c": 0.7323636363636362, "calib/mu_w": 0.5008823529411764, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.19841463414634147, "calib/std_conf": 0.3640862185512724, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.726094674556213, "calib/step_q_c_n": 169.0, "calib/step_q_gap": 0.19453573919499623, "calib/step_q_w": 0.5315589353612168, "calib/step_q_w_n": 263.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2267.0, "completions/max_terminated_length": 2267.0, "completions/mean_length": 430.09375, "completions/mean_terminated_length": 436.920654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.112, "grad_norm": 0.0060654194094240665, "kl": 0.250762939453125, "learning_rate": 2.6388888888888893e-06, "loss": 0.0077, "num_tokens": 23663422.0, "reward": 0.2967536449432373, "reward_std": 0.26435887813568115, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6521124839782715, "rewards/format_reward_step": 0.9140625, "rewards/step_l1_reward": -0.32813647389411926, "step": 105 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6703801652892561, "calib/avg_num_step_conf": 1.375, "calib/ece": 0.22585772357723571, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.34146341463414637, "calib/gap": 0.20697381818181804, "calib/mean_conf": 0.626987804878049, "calib/mu_c": 0.7287919999999999, "calib/mu_w": 0.5218181818181818, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.17235772357723572, "calib/std_conf": 0.3483230964579807, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.7886063063063062, "calib/step_q_c_n": 148.0, "calib/step_q_gap": 0.19341954160042385, "calib/step_q_w": 0.5951867647058824, "calib/step_q_w_n": 204.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1294.0, "completions/max_terminated_length": 1294.0, "completions/mean_length": 405.99609375, "completions/mean_terminated_length": 412.44049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.11306666666666666, "grad_norm": 0.005529459100216627, "kl": 0.234954833984375, "learning_rate": 2.6111111111111113e-06, "loss": -0.0274, "num_tokens": 23871941.0, "reward": 0.30446913838386536, "reward_std": 0.2846444845199585, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6475687623023987, "rewards/format_reward_step": 0.91015625, "rewards/step_l1_reward": -0.3183179795742035, "step": 106 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5851772860570515, "calib/avg_num_step_conf": 1.5625, "calib/ece": 0.293224081632653, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.39183673469387753, "calib/gap": 0.11109621434284223, "calib/mean_conf": 0.6775514285714285, "calib/mu_c": 0.7324193548387098, "calib/mu_w": 0.6213231404958676, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.23232653061224487, "calib/std_conf": 0.3371138069324509, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.7262352941176471, "calib/step_q_c_n": 170.0, "calib/step_q_gap": 0.17114746803069047, "calib/step_q_w": 0.5550878260869566, "calib/step_q_w_n": 230.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 419.98828125, "completions/mean_terminated_length": 421.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.11413333333333334, "grad_norm": 0.005133834667503834, "kl": 0.25164794921875, "learning_rate": 2.5833333333333337e-06, "loss": 0.0226, "num_tokens": 24084074.0, "reward": 0.2446097731590271, "reward_std": 0.29522377252578735, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5793129205703735, "rewards/format_reward_step": 0.890625, "rewards/step_l1_reward": -0.3650933504104614, "step": 107 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6171189692982456, "calib/avg_num_step_conf": 1.140625, "calib/ece": 0.2592633064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.4435483870967742, "calib/gap": 0.13494758771929827, "calib/mean_conf": 0.6685431451612904, "calib/mu_c": 0.7207809210526316, "calib/mu_w": 0.5858333333333333, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.90625, "calib/pce": 0.15745161290322582, "calib/std_conf": 0.35744971521127844, "calib/step_conf_rate": 0.90625, "calib/step_q_c": 0.768778488372093, "calib/step_q_c_n": 172.0, "calib/step_q_gap": 0.12694515503875958, "calib/step_q_w": 0.6418333333333334, "calib/step_q_w_n": 120.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 434.44921875, "completions/mean_terminated_length": 436.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1152, "grad_norm": 0.004808885511010885, "kl": 0.228851318359375, "learning_rate": 2.5555555555555557e-06, "loss": 0.0153, "num_tokens": 24298525.0, "reward": 0.3085951805114746, "reward_std": 0.3162803053855896, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6369038224220276, "rewards/format_reward_step": 0.90625, "rewards/step_l1_reward": -0.3197134733200073, "step": 108 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.7463654891304348, "calib/avg_num_step_conf": 1.56640625, "calib/ece": 0.1926337448559671, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.35390946502057613, "calib/gap": 0.2971317934782609, "calib/mean_conf": 0.6113991769547326, "calib/mu_c": 0.7679130434782608, "calib/mu_w": 0.47078124999999993, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.16539094650205763, "calib/std_conf": 0.3559796329763443, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.717907100591716, "calib/step_q_c_n": 169.0, "calib/step_q_gap": 0.19427347990206079, "calib/step_q_w": 0.5236336206896552, "calib/step_q_w_n": 232.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1530.0, "completions/max_terminated_length": 1530.0, "completions/mean_length": 419.140625, "completions/mean_terminated_length": 427.49005126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.11626666666666667, "grad_norm": 0.005312109831720591, "kl": 0.236328125, "learning_rate": 2.5277777777777778e-06, "loss": -0.0321, "num_tokens": 24510425.0, "reward": 0.3231509327888489, "reward_std": 0.2573995888233185, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6752394437789917, "rewards/format_reward_step": 0.90625, "rewards/step_l1_reward": -0.30003127455711365, "step": 109 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.653310684785505, "calib/avg_num_step_conf": 1.625, "calib/ece": 0.2919429149797571, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.3724696356275304, "calib/gap": 0.17001091793232093, "calib/mean_conf": 0.6540489878542509, "calib/mu_c": 0.7497231481481482, "calib/mu_w": 0.5797122302158273, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.25437246963562754, "calib/std_conf": 0.34342655702467995, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.7162337662337663, "calib/step_q_c_n": 154.0, "calib/step_q_gap": 0.1783955982948351, "calib/step_q_w": 0.5378381679389312, "calib/step_q_w_n": 262.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2917.0, "completions/max_terminated_length": 2917.0, "completions/mean_length": 429.60546875, "completions/mean_terminated_length": 436.42462158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.11733333333333333, "grad_norm": 0.004588097333908081, "kl": 0.236175537109375, "learning_rate": 2.5e-06, "loss": -0.0414, "num_tokens": 24725324.0, "reward": 0.2684740722179413, "reward_std": 0.2740425765514374, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6149437427520752, "rewards/format_reward_step": 0.91015625, "rewards/step_l1_reward": -0.3444017767906189, "step": 110 }, { "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.6699943757030371, "calib/avg_num_step_conf": 1.4921875, "calib/ece": 0.26048814504881457, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.4560669456066946, "calib/gap": 0.1721283277090362, "calib/mean_conf": 0.6770013947001395, "calib/mu_c": 0.7576640419947505, "calib/mu_w": 0.5855357142857143, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.20305439330543942, "calib/std_conf": 0.34518280147230895, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.7737504873294346, "calib/step_q_c_n": 171.0, "calib/step_q_gap": 0.23127465794554825, "calib/step_q_w": 0.5424758293838864, "calib/step_q_w_n": 211.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2612.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 450.96484375, "completions/mean_terminated_length": 454.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.1184, "grad_norm": 0.005634634755551815, "kl": 0.24365234375, "learning_rate": 2.4722222222222226e-06, "loss": 0.0245, "num_tokens": 24948179.0, "reward": 0.3159874975681305, "reward_std": 0.2590514123439789, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6415572762489319, "rewards/format_reward_step": 0.91796875, "rewards/step_l1_reward": -0.292394757270813, "step": 111 }, { "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.7191492450638793, "calib/avg_num_step_conf": 1.4609375, "calib/ece": 0.18625531914893617, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.3404255319148936, "calib/gap": 0.29017857142857145, "calib/mean_conf": 0.5682978723404256, "calib/mu_c": 0.7201785714285714, "calib/mu_w": 0.43, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 0.94140625, "calib/nonempty_step_conf_rate": 0.8984375, "calib/pce": 0.13897872340425535, "calib/std_conf": 0.3726427708628253, "calib/step_conf_rate": 0.8984375, "calib/step_q_c": 0.686558024691358, "calib/step_q_c_n": 162.0, "calib/step_q_gap": 0.23490708129513155, "calib/step_q_w": 0.4516509433962264, "calib/step_q_w_n": 212.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2437.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 464.984375, "completions/mean_terminated_length": 474.2470397949219, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.11946666666666667, "grad_norm": 0.005269154440611601, "kl": 0.239166259765625, "learning_rate": 2.4444444444444447e-06, "loss": 0.0021, "num_tokens": 25175135.0, "reward": 0.31370168924331665, "reward_std": 0.27312415838241577, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6559332013130188, "rewards/format_reward_step": 0.87109375, "rewards/step_l1_reward": -0.2902485728263855, "step": 112 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7051602564102564, "calib/avg_num_step_conf": 1.6328125, "calib/ece": 0.24623999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.376, "calib/gap": 0.23257692307692324, "calib/mean_conf": 0.64656, "calib/mu_c": 0.7675000000000001, "calib/mu_w": 0.5349230769230768, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.20639999999999997, "calib/std_conf": 0.3435057006804982, "calib/step_conf_rate": 0.9375, "calib/step_q_c": 0.766530612244898, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.2937234781735574, "calib/step_q_w": 0.4728071340713407, "calib/step_q_w_n": 271.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2803.0, "completions/max_terminated_length": 2803.0, "completions/mean_length": 386.9375, "completions/mean_terminated_length": 389.9842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.12053333333333334, "grad_norm": 0.006095048971474171, "kl": 0.2994384765625, "learning_rate": 2.4166666666666667e-06, "loss": 0.0163, "num_tokens": 25379391.0, "reward": 0.30809134244918823, "reward_std": 0.28997525572776794, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6653277277946472, "rewards/format_reward_step": 0.92578125, "rewards/step_l1_reward": -0.32805129885673523, "step": 113 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6874215590573142, "calib/avg_num_step_conf": 1.52734375, "calib/ece": 0.1843621399176955, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.3292181069958848, "calib/gap": 0.23749686236229256, "calib/mean_conf": 0.6245267489711934, "calib/mu_c": 0.7232394366197183, "calib/mu_w": 0.4857425742574258, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.11226337448559671, "calib/std_conf": 0.3492250125159953, "calib/step_conf_rate": 0.9140625, "calib/step_q_c": 0.7372666666666667, "calib/step_q_c_n": 200.0, "calib/step_q_gap": 0.26213577661431064, "calib/step_q_w": 0.4751308900523561, "calib/step_q_w_n": 191.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2904.0, "completions/max_terminated_length": 2904.0, "completions/mean_length": 397.74609375, "completions/mean_terminated_length": 400.8779602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.1216, "grad_norm": 0.005061530973762274, "kl": 0.26641845703125, "learning_rate": 2.388888888888889e-06, "loss": 0.0279, "num_tokens": 25586238.0, "reward": 0.3437195420265198, "reward_std": 0.26500290632247925, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.678789496421814, "rewards/format_reward_step": 0.91015625, "rewards/step_l1_reward": -0.28431904315948486, "step": 114 }, { "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.6103429010405755, "calib/avg_num_step_conf": 1.53515625, "calib/ece": 0.29062499999999997, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.2875, "calib/gap": 0.1263461135554158, "calib/mean_conf": 0.5681250000000001, "calib/mu_c": 0.636036036036036, "calib/mu_w": 0.5096899224806202, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.198125, "calib/std_conf": 0.3588108615621885, "calib/step_conf_rate": 0.921875, "calib/step_q_c": 0.6452280701754386, "calib/step_q_c_n": 171.0, "calib/step_q_gap": 0.09575014224751077, "calib/step_q_w": 0.5494779279279278, "calib/step_q_w_n": 222.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3008.0, "completions/max_terminated_length": 3008.0, "completions/mean_length": 402.4609375, "completions/mean_terminated_length": 405.6299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.12266666666666666, "grad_norm": 0.00514686293900013, "kl": 0.2657623291015625, "learning_rate": 2.361111111111111e-06, "loss": -0.0078, "num_tokens": 25794532.0, "reward": 0.2529529333114624, "reward_std": 0.2864396572113037, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5985375046730042, "rewards/format_reward_step": 0.8984375, "rewards/step_l1_reward": -0.3598191440105438, "step": 115 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6419, "calib/avg_num_step_conf": 1.75, "calib/ece": 0.2500408163265306, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.3183673469387755, "calib/gap": 0.17293999999999998, "calib/mean_conf": 0.587265306122449, "calib/mu_c": 0.6755, "calib/mu_w": 0.50256, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.1737551020408163, "calib/std_conf": 0.3643437923176632, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.6730946502057613, "calib/step_q_c_n": 162.0, "calib/step_q_gap": 0.26321745722330525, "calib/step_q_w": 0.40987719298245606, "calib/step_q_w_n": 285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1813.0, "completions/max_terminated_length": 1813.0, "completions/mean_length": 438.4296875, "completions/mean_terminated_length": 440.1490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.12373333333333333, "grad_norm": 0.005122186616063118, "kl": 0.2557373046875, "learning_rate": 2.3333333333333336e-06, "loss": -0.0139, "num_tokens": 26011290.0, "reward": 0.27786949276924133, "reward_std": 0.28764808177948, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.611339807510376, "rewards/format_reward_step": 0.89453125, "rewards/step_l1_reward": -0.32825711369514465, "step": 116 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.6506338028169015, "calib/avg_num_step_conf": 1.6171875, "calib/ece": 0.291900826446281, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.359504132231405, "calib/gap": 0.15911830985915476, "calib/mean_conf": 0.6360330578512396, "calib/mu_c": 0.7293999999999998, "calib/mu_w": 0.5702816901408451, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.25735537190082647, "calib/std_conf": 0.3401676284364873, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.6961691176470588, "calib/step_q_c_n": 136.0, "calib/step_q_gap": 0.16289573635209476, "calib/step_q_w": 0.533273381294964, "calib/step_q_w_n": 278.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1682.0, "completions/max_terminated_length": 1682.0, "completions/mean_length": 402.79296875, "completions/mean_terminated_length": 407.5691833496094, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.1248, "grad_norm": 0.005389553029090166, "kl": 0.259979248046875, "learning_rate": 2.305555555555556e-06, "loss": -0.0049, "num_tokens": 26221005.0, "reward": 0.2536861300468445, "reward_std": 0.30577370524406433, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.6082687377929688, "rewards/format_reward_step": 0.9140625, "rewards/step_l1_reward": -0.36183398962020874, "step": 117 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6660320378151261, "calib/avg_num_step_conf": 2.43359375, "calib/ece": 0.2161943319838056, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.2793522267206478, "calib/gap": 0.18538602941176457, "calib/mean_conf": 0.577246963562753, "calib/mu_c": 0.6665625, "calib/mu_w": 0.48117647058823537, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.13761133603238862, "calib/std_conf": 0.3518414711064845, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.6069601769911505, "calib/step_q_c_n": 226.0, "calib/step_q_gap": 0.2586912433219649, "calib/step_q_w": 0.34826893366918554, "calib/step_q_w_n": 397.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2749.0, "completions/max_terminated_length": 2749.0, "completions/mean_length": 407.22265625, "completions/mean_terminated_length": 413.6865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.12586666666666665, "grad_norm": 0.005140895489603281, "kl": 0.269775390625, "learning_rate": 2.277777777777778e-06, "loss": -0.0047, "num_tokens": 26429262.0, "reward": 0.28935784101486206, "reward_std": 0.2676890194416046, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6488054990768433, "rewards/format_reward_step": 0.9140625, "rewards/step_l1_reward": -0.35290229320526123, "step": 118 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.6801426452934116, "calib/avg_num_step_conf": 1.515625, "calib/ece": 0.22511204481792713, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.3025210084033613, "calib/gap": 0.21985994397759107, "calib/mean_conf": 0.5703501400560225, "calib/mu_c": 0.680280112044818, "calib/mu_w": 0.4604201680672269, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.14773109243697474, "calib/std_conf": 0.35998472150569677, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.6668528735632184, "calib/step_q_c_n": 174.0, "calib/step_q_gap": 0.204742125899667, "calib/step_q_w": 0.4621107476635514, "calib/step_q_w_n": 214.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1673.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 443.99609375, "completions/mean_terminated_length": 447.49212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 42.0, "epoch": 0.12693333333333334, "grad_norm": 0.0047919717617332935, "kl": 0.262969970703125, "learning_rate": 2.25e-06, "loss": 0.0135, "num_tokens": 26647989.0, "reward": 0.30583804845809937, "reward_std": 0.27502328157424927, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6427181959152222, "rewards/format_reward_step": 0.89453125, "rewards/step_l1_reward": -0.3036983609199524, "step": 119 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6918258160639307, "calib/avg_num_step_conf": 1.45703125, "calib/ece": 0.1816734693877551, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.2938775510204082, "calib/gap": 0.21975145604767699, "calib/mean_conf": 0.5871428571428571, "calib/mu_c": 0.6831159420289854, "calib/mu_w": 0.4633644859813084, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.10277551020408163, "calib/std_conf": 0.34212302358260854, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.649336569579288, "calib/step_q_c_n": 206.0, "calib/step_q_gap": 0.11979565141561532, "calib/step_q_w": 0.5295409181636727, "calib/step_q_w_n": 167.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1621.0, "completions/max_terminated_length": 1621.0, "completions/mean_length": 395.3359375, "completions/mean_terminated_length": 398.4488220214844, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.128, "grad_norm": 0.0057893842458724976, "kl": 0.268463134765625, "learning_rate": 2.222222222222222e-06, "loss": -0.0043, "num_tokens": 26855883.0, "reward": 0.3432433009147644, "reward_std": 0.26072174310684204, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.698214054107666, "rewards/format_reward_step": 0.9375, "rewards/step_l1_reward": -0.3070399761199951, "step": 120 }, { "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5701693981545242, "calib/avg_num_step_conf": 1.8203125, "calib/ece": 0.26732510288065847, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.25102880658436216, "calib/gap": 0.09005853188266078, "calib/mean_conf": 0.5504526748971194, "calib/mu_c": 0.6012264150943396, "calib/mu_w": 0.5111678832116788, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.19078189300411524, "calib/std_conf": 0.3515295807101361, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.6165358024691358, "calib/step_q_c_n": 162.0, "calib/step_q_gap": 0.20650257878492517, "calib/step_q_w": 0.4100332236842106, "calib/step_q_w_n": 304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2897.0, "completions/max_terminated_length": 2897.0, "completions/mean_length": 464.125, "completions/mean_terminated_length": 475.2640075683594, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.12906666666666666, "grad_norm": 0.005195781122893095, "kl": 0.25836181640625, "learning_rate": 2.1944444444444445e-06, "loss": 0.0109, "num_tokens": 27079755.0, "reward": 0.25892913341522217, "reward_std": 0.3152400851249695, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6206378936767578, "rewards/format_reward_step": 0.93359375, "rewards/step_l1_reward": -0.37231090664863586, "step": 121 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6745995273109243, "calib/avg_num_step_conf": 1.6640625, "calib/ece": 0.22302699055330633, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.2793522267206478, "calib/gap": 0.20782245710784325, "calib/mean_conf": 0.5758151147098516, "calib/mu_c": 0.6759401041666667, "calib/mu_w": 0.46811764705882347, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.14031174089068826, "calib/std_conf": 0.3581826466658249, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.69139896373057, "calib/step_q_c_n": 193.0, "calib/step_q_gap": 0.1968727834730592, "calib/step_q_w": 0.4945261802575108, "calib/step_q_w_n": 233.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1538.0, "completions/max_terminated_length": 1538.0, "completions/mean_length": 381.82421875, "completions/mean_terminated_length": 386.351806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.13013333333333332, "grad_norm": 0.005926921498030424, "kl": 0.29742431640625, "learning_rate": 2.166666666666667e-06, "loss": -0.0226, "num_tokens": 27284846.0, "reward": 0.3194349706172943, "reward_std": 0.2801957130432129, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6791479587554932, "rewards/format_reward_step": 0.9375, "rewards/step_l1_reward": -0.3285592198371887, "step": 122 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.6145352900069883, "calib/avg_num_step_conf": 1.8671875, "calib/ece": 0.25282157676348554, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.2157676348547718, "calib/gap": 0.13559678546470993, "calib/mean_conf": 0.5052697095435685, "calib/mu_c": 0.5812264150943396, "calib/mu_w": 0.44562962962962965, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.15912863070539424, "calib/std_conf": 0.3547578653453605, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.5037560975609756, "calib/step_q_c_n": 205.0, "calib/step_q_gap": 0.04045939426427231, "calib/step_q_w": 0.4632967032967033, "calib/step_q_w_n": 273.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1970.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 478.703125, "completions/mean_terminated_length": 480.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.1312, "grad_norm": 0.004890909418463707, "kl": 0.2481689453125, "learning_rate": 2.138888888888889e-06, "loss": -0.0048, "num_tokens": 27512682.0, "reward": 0.2750769853591919, "reward_std": 0.25875532627105713, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6226316690444946, "rewards/format_reward_step": 0.89453125, "rewards/step_l1_reward": -0.3349777162075043, "step": 123 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6332996292551397, "calib/avg_num_step_conf": 1.59765625, "calib/ece": 0.23532786885245904, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.29508196721311475, "calib/gap": 0.1512854735422986, "calib/mean_conf": 0.5888524590163935, "calib/mu_c": 0.66015503875969, "calib/mu_w": 0.5088695652173914, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.14774590163934428, "calib/std_conf": 0.3425404049909007, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6702487562189056, "calib/step_q_c_n": 201.0, "calib/step_q_gap": 0.14543625621890555, "calib/step_q_w": 0.5248125, "calib/step_q_w_n": 208.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2125.0, "completions/max_terminated_length": 2125.0, "completions/mean_length": 419.734375, "completions/mean_terminated_length": 423.03936767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.13226666666666667, "grad_norm": 0.005201234016567469, "kl": 0.279571533203125, "learning_rate": 2.1111111111111114e-06, "loss": 0.0035, "num_tokens": 27726950.0, "reward": 0.3044394850730896, "reward_std": 0.2931825518608093, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6657023429870605, "rewards/format_reward_step": 0.93359375, "rewards/step_l1_reward": -0.3443233370780945, "step": 124 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.6598881900768693, "calib/avg_num_step_conf": 1.828125, "calib/ece": 0.21829875518672198, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.23651452282157676, "calib/gap": 0.19087002096436073, "calib/mean_conf": 0.5350622406639005, "calib/mu_c": 0.6419811320754718, "calib/mu_w": 0.4511111111111111, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.15676348547717844, "calib/std_conf": 0.3517426920879851, "calib/step_conf_rate": 0.9140625, "calib/step_q_c": 0.5855860215053763, "calib/step_q_c_n": 186.0, "calib/step_q_gap": 0.10646793639899332, "calib/step_q_w": 0.479118085106383, "calib/step_q_w_n": 282.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1630.0, "completions/max_terminated_length": 1630.0, "completions/mean_length": 439.90625, "completions/mean_terminated_length": 445.12255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.13333333333333333, "grad_norm": 0.00522991269826889, "kl": 0.262451171875, "learning_rate": 2.0833333333333334e-06, "loss": 0.0011, "num_tokens": 27944374.0, "reward": 0.26843976974487305, "reward_std": 0.2542046904563904, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6382890939712524, "rewards/format_reward_step": 0.89453125, "rewards/step_l1_reward": -0.3639094829559326, "step": 125 }, { "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.6901683161164487, "calib/avg_num_step_conf": 2.25390625, "calib/ece": 0.2394915254237288, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.3135593220338983, "calib/gap": 0.2394957740374195, "calib/mean_conf": 0.5553389830508475, "calib/mu_c": 0.6842201834862384, "calib/mu_w": 0.44472440944881886, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.16648305084745763, "calib/std_conf": 0.3718521717468706, "calib/step_conf_rate": 0.9375, "calib/step_q_c": 0.6838181818181818, "calib/step_q_c_n": 165.0, "calib/step_q_gap": 0.3663399455722271, "calib/step_q_w": 0.31747823624595467, "calib/step_q_w_n": 412.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 466.31640625, "completions/mean_terminated_length": 477.5080261230469, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.1344, "grad_norm": 0.004744419362396002, "kl": 0.2213134765625, "learning_rate": 2.0555555555555555e-06, "loss": -0.0029, "num_tokens": 28169215.0, "reward": 0.2959335148334503, "reward_std": 0.26156705617904663, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6451636552810669, "rewards/format_reward_step": 0.89453125, "rewards/step_l1_reward": -0.31735917925834656, "step": 126 }, { "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.668756967670011, "calib/avg_num_step_conf": 2.25390625, "calib/ece": 0.23289214876033065, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.2727272727272727, "calib/gap": 0.20829358974358975, "calib/mean_conf": 0.5742979338842975, "calib/mu_c": 0.693076923076923, "calib/mu_w": 0.4847833333333333, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.18871900826446286, "calib/std_conf": 0.35634054730957604, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.574974358974359, "calib/step_q_c_n": 195.0, "calib/step_q_gap": 0.22295053698483025, "calib/step_q_w": 0.35202382198952875, "calib/step_q_w_n": 382.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 427.49609375, "completions/mean_terminated_length": 437.7560119628906, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.13546666666666668, "grad_norm": 0.00546091515570879, "kl": 0.27142333984375, "learning_rate": 2.027777777777778e-06, "loss": 0.0057, "num_tokens": 28382326.0, "reward": 0.2984740138053894, "reward_std": 0.2718273997306824, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6555015444755554, "rewards/format_reward_step": 0.91796875, "rewards/step_l1_reward": -0.324178546667099, "step": 127 }, { "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.584051724137931, "calib/avg_num_step_conf": 1.6640625, "calib/ece": 0.24991666666666662, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.2833333333333333, "calib/gap": 0.12667408231368193, "calib/mean_conf": 0.5155, "calib/mu_c": 0.580948275862069, "calib/mu_w": 0.45427419354838705, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.14104166666666668, "calib/std_conf": 0.37323774818025396, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.6690751445086706, "calib/step_q_c_n": 173.0, "calib/step_q_gap": 0.18159135004226734, "calib/step_q_w": 0.4874837944664032, "calib/step_q_w_n": 253.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2049.0, "completions/max_terminated_length": 2049.0, "completions/mean_length": 419.8671875, "completions/mean_terminated_length": 428.2310791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.13653333333333334, "grad_norm": 0.005378535017371178, "kl": 0.268280029296875, "learning_rate": 2.0000000000000003e-06, "loss": 0.0027, "num_tokens": 28596476.0, "reward": 0.2697708010673523, "reward_std": 0.2821349501609802, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6162769794464111, "rewards/format_reward_step": 0.91015625, "rewards/step_l1_reward": -0.3493916392326355, "step": 128 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.610390866873065, "calib/avg_num_step_conf": 1.91796875, "calib/ece": 0.24255600000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.308, "calib/gap": 0.11783578431372543, "calib/mean_conf": 0.597436, "calib/mu_c": 0.6511691176470588, "calib/mu_w": 0.5333333333333333, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.14799600000000002, "calib/std_conf": 0.3353037874883014, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.643483193277311, "calib/step_q_c_n": 238.0, "calib/step_q_gap": 0.07872128851540627, "calib/step_q_w": 0.5647619047619047, "calib/step_q_w_n": 252.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 391.97265625, "completions/mean_terminated_length": 393.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.1376, "grad_norm": 0.005064961966127157, "kl": 0.27825927734375, "learning_rate": 1.9722222222222224e-06, "loss": 0.0205, "num_tokens": 28799205.0, "reward": 0.31468018889427185, "reward_std": 0.26014626026153564, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6678237915039062, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.336119681596756, "step": 129 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6901665989435188, "calib/avg_num_step_conf": 1.55859375, "calib/ece": 0.17555102040816326, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.2530612244897959, "calib/gap": 0.24424488690234325, "calib/mean_conf": 0.5473877551020409, "calib/mu_c": 0.6540579710144928, "calib/mu_w": 0.40981308411214956, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.07983673469387752, "calib/std_conf": 0.3549701232801023, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.6881642512077294, "calib/step_q_c_n": 207.0, "calib/step_q_gap": 0.23933091787439603, "calib/step_q_w": 0.44883333333333336, "calib/step_q_w_n": 192.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 400.79296875, "completions/mean_terminated_length": 403.9488220214844, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.13866666666666666, "grad_norm": 0.00564991869032383, "kl": 0.272186279296875, "learning_rate": 1.944444444444445e-06, "loss": 0.0351, "num_tokens": 29007096.0, "reward": 0.33700209856033325, "reward_std": 0.25782716274261475, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6895445585250854, "rewards/format_reward_step": 0.9296875, "rewards/step_l1_reward": -0.30929034948349, "step": 130 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7038740245261984, "calib/avg_num_step_conf": 2.40625, "calib/ece": 0.1713616935483871, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.16129032258064516, "calib/gap": 0.23632934782608694, "calib/mean_conf": 0.48767056451612906, "calib/mu_c": 0.636329347826087, "calib/mu_w": 0.4, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.14403225806451614, "calib/std_conf": 0.3277529285265745, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.5875722532588454, "calib/step_q_c_n": 179.0, "calib/step_q_gap": 0.1450061205357333, "calib/step_q_w": 0.4425661327231121, "calib/step_q_w_n": 437.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 394.24609375, "completions/mean_terminated_length": 398.92095947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.13973333333333332, "grad_norm": 0.0058201816864311695, "kl": 0.248626708984375, "learning_rate": 1.916666666666667e-06, "loss": 0.0142, "num_tokens": 29214231.0, "reward": 0.3132360577583313, "reward_std": 0.2416331171989441, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.717965841293335, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.353993684053421, "step": 131 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.671569327431101, "calib/avg_num_step_conf": 1.7734375, "calib/ece": 0.22041152263374486, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.3950617283950617, "calib/gap": 0.2276802798800513, "calib/mean_conf": 0.5984362139917696, "calib/mu_c": 0.6865100671140939, "calib/mu_w": 0.4588297872340426, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.10283950617283948, "calib/std_conf": 0.38021656034767987, "calib/step_conf_rate": 0.9296875, "calib/step_q_c": 0.67275956284153, "calib/step_q_c_n": 244.0, "calib/step_q_gap": 0.1910928961748634, "calib/step_q_w": 0.48166666666666663, "calib/step_q_w_n": 210.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 399.59765625, "completions/mean_terminated_length": 405.94049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.1408, "grad_norm": 0.0052103460766375065, "kl": 0.26055908203125, "learning_rate": 1.888888888888889e-06, "loss": 0.0196, "num_tokens": 29422120.0, "reward": 0.33788812160491943, "reward_std": 0.274682879447937, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6694461107254028, "rewards/format_reward_step": 0.91015625, "rewards/step_l1_reward": -0.29210734367370605, "step": 132 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6606093189964157, "calib/avg_num_step_conf": 1.984375, "calib/ece": 0.2209726530612245, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.2, "calib/gap": 0.1914083870967742, "calib/mean_conf": 0.4929048979591837, "calib/mu_c": 0.614, "calib/mu_w": 0.4225916129032258, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.17326530612244895, "calib/std_conf": 0.338006179596465, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.5865408805031448, "calib/step_q_c_n": 159.0, "calib/step_q_gap": 0.12561910399884685, "calib/step_q_w": 0.46092177650429794, "calib/step_q_w_n": 349.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 501.3359375, "completions/mean_terminated_length": 507.2806396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.14186666666666667, "grad_norm": 0.0043398430570960045, "kl": 0.2178497314453125, "learning_rate": 1.8611111111111113e-06, "loss": -0.0081, "num_tokens": 29656806.0, "reward": 0.28701332211494446, "reward_std": 0.26014071702957153, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6839252710342407, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.3684923052787781, "step": 133 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6431696162258898, "calib/avg_num_step_conf": 2.0703125, "calib/ece": 0.22766129032258062, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.23387096774193547, "calib/gap": 0.17175979319944323, "calib/mean_conf": 0.513467741935484, "calib/mu_c": 0.6111214953271028, "calib/mu_w": 0.43936170212765957, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.15483870967741933, "calib/std_conf": 0.3590434072842112, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.6530263157894737, "calib/step_q_c_n": 152.0, "calib/step_q_gap": 0.2766042699340945, "calib/step_q_w": 0.3764220458553792, "calib/step_q_w_n": 378.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2827.0, "completions/max_terminated_length": 2827.0, "completions/mean_length": 461.46875, "completions/mean_terminated_length": 463.2784729003906, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.14293333333333333, "grad_norm": 0.00501696951687336, "kl": 0.22442626953125, "learning_rate": 1.8333333333333333e-06, "loss": 0.0306, "num_tokens": 29883894.0, "reward": 0.28798556327819824, "reward_std": 0.2733740508556366, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6685503721237183, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.36679795384407043, "step": 134 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6055826889160223, "calib/avg_num_step_conf": 2.04296875, "calib/ece": 0.2603703703703704, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.23045267489711935, "calib/gap": 0.13574481074481087, "calib/mean_conf": 0.5422633744855968, "calib/mu_c": 0.6126495726495728, "calib/mu_w": 0.4769047619047619, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.16057613168724283, "calib/std_conf": 0.3544975877031863, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6573026737967915, "calib/step_q_c_n": 187.0, "calib/step_q_gap": 0.1926002928444106, "calib/step_q_w": 0.4647023809523809, "calib/step_q_w_n": 336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 422.26953125, "completions/mean_terminated_length": 432.4040222167969, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.144, "grad_norm": 0.005108509678393602, "kl": 0.24139404296875, "learning_rate": 1.8055555555555557e-06, "loss": -0.0296, "num_tokens": 30097875.0, "reward": 0.2935692071914673, "reward_std": 0.26770222187042236, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.650739848613739, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.34485143423080444, "step": 135 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7425729442970821, "calib/avg_num_step_conf": 2.28125, "calib/ece": 0.16321285140562247, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.23293172690763053, "calib/gap": 0.2934025198938993, "calib/mean_conf": 0.5302008032128516, "calib/mu_c": 0.7010576923076924, "calib/mu_w": 0.4076551724137931, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.1378714859437751, "calib/std_conf": 0.35086634006945205, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.6574111675126904, "calib/step_q_c_n": 197.0, "calib/step_q_gap": 0.22448265760743624, "calib/step_q_w": 0.4329285099052541, "calib/step_q_w_n": 387.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2609.0, "completions/max_terminated_length": 2609.0, "completions/mean_length": 461.68359375, "completions/mean_terminated_length": 463.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.14506666666666668, "grad_norm": 0.005101132206618786, "kl": 0.210723876953125, "learning_rate": 1.777777777777778e-06, "loss": 0.0492, "num_tokens": 30324554.0, "reward": 0.337179958820343, "reward_std": 0.23052147030830383, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.7274148464202881, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.32571113109588623, "step": 136 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6756081525312295, "calib/avg_num_step_conf": 2.140625, "calib/ece": 0.20946194331983808, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.23481781376518218, "calib/gap": 0.20794786324786324, "calib/mean_conf": 0.522117004048583, "calib/mu_c": 0.6315632478632478, "calib/mu_w": 0.42361538461538456, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.12894736842105267, "calib/std_conf": 0.3421008175180087, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.6059450704225353, "calib/step_q_c_n": 213.0, "calib/step_q_gap": 0.12614208534790844, "calib/step_q_w": 0.4798029850746269, "calib/step_q_w_n": 335.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2991.0, "completions/max_terminated_length": 2991.0, "completions/mean_length": 442.06640625, "completions/mean_terminated_length": 445.5472412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.14613333333333334, "grad_norm": 0.004609386902302504, "kl": 0.2218017578125, "learning_rate": 1.75e-06, "loss": 0.0563, "num_tokens": 30544707.0, "reward": 0.3316650390625, "reward_std": 0.24586954712867737, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6928571462631226, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.30921459197998047, "step": 137 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6779999999999999, "calib/avg_num_step_conf": 1.72265625, "calib/ece": 0.2064216326530612, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.2653061224489796, "calib/gap": 0.20887975438596507, "calib/mean_conf": 0.5405171428571428, "calib/mu_c": 0.6215113333333335, "calib/mu_w": 0.41263157894736846, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.0673469387755102, "calib/std_conf": 0.34302588328591416, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.623115294117647, "calib/step_q_c_n": 255.0, "calib/step_q_gap": 0.21311529411764701, "calib/step_q_w": 0.41, "calib/step_q_w_n": 186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 421.92578125, "completions/mean_terminated_length": 430.3306884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.1472, "grad_norm": 0.0051317536272108555, "kl": 0.2580108642578125, "learning_rate": 1.7222222222222224e-06, "loss": -0.0022, "num_tokens": 30757056.0, "reward": 0.33365753293037415, "reward_std": 0.2487613558769226, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6757357120513916, "rewards/format_reward_step": 0.921875, "rewards/step_l1_reward": -0.30998310446739197, "step": 138 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6721259842519686, "calib/avg_num_step_conf": 1.8359375, "calib/ece": 0.21853174603174597, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.29365079365079366, "calib/gap": 0.2081417322834645, "calib/mean_conf": 0.5928968253968254, "calib/mu_c": 0.6961417322834644, "calib/mu_w": 0.48799999999999993, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.1537301587301587, "calib/std_conf": 0.35177207468701155, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6860891122278059, "calib/step_q_c_n": 199.0, "calib/step_q_gap": 0.1736145734824185, "calib/step_q_w": 0.5124745387453874, "calib/step_q_w_n": 271.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1136.0, "completions/max_terminated_length": 1136.0, "completions/mean_length": 369.76171875, "completions/mean_terminated_length": 374.1462707519531, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.14826666666666666, "grad_norm": 0.005164094269275665, "kl": 0.253387451171875, "learning_rate": 1.6944444444444446e-06, "loss": 0.0042, "num_tokens": 30954811.0, "reward": 0.3282237648963928, "reward_std": 0.2631811499595642, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.7040168046951294, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.3413192629814148, "step": 139 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7424364934977428, "calib/avg_num_step_conf": 2.0859375, "calib/ece": 0.16077279999999988, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.324, "calib/gap": 0.2902782561821979, "calib/mean_conf": 0.5875471999999999, "calib/mu_c": 0.7001751633986928, "calib/mu_w": 0.4098969072164949, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.06815999999999989, "calib/std_conf": 0.3568073439997557, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6992158914728681, "calib/step_q_c_n": 258.0, "calib/step_q_gap": 0.25446951466127393, "calib/step_q_w": 0.4447463768115942, "calib/step_q_w_n": 276.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 400.9375, "completions/mean_terminated_length": 404.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.14933333333333335, "grad_norm": 0.004881748929619789, "kl": 0.25836181640625, "learning_rate": 1.6666666666666667e-06, "loss": 0.0058, "num_tokens": 31162467.0, "reward": 0.3957335948944092, "reward_std": 0.2444041669368744, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.7389832139015198, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.25845348834991455, "step": 140 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6920915295062224, "calib/avg_num_step_conf": 1.95703125, "calib/ece": 0.18542510121457484, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.31983805668016196, "calib/gap": 0.2266372273517998, "calib/mean_conf": 0.5969230769230769, "calib/mu_c": 0.6941843971631205, "calib/mu_w": 0.4675471698113207, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.10574898785425096, "calib/std_conf": 0.3524584653421482, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6735418326693228, "calib/step_q_c_n": 251.0, "calib/step_q_gap": 0.28961543266932277, "calib/step_q_w": 0.3839264, "calib/step_q_w_n": 250.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2182.0, "completions/max_terminated_length": 2182.0, "completions/mean_length": 438.5625, "completions/mean_terminated_length": 443.76287841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.1504, "grad_norm": 0.005510457791388035, "kl": 0.240234375, "learning_rate": 1.638888888888889e-06, "loss": -0.003, "num_tokens": 31381835.0, "reward": 0.35618430376052856, "reward_std": 0.28215062618255615, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6942168474197388, "rewards/format_reward_step": 0.9375, "rewards/step_l1_reward": -0.2795044183731079, "step": 141 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6722083413415337, "calib/avg_num_step_conf": 2.23828125, "calib/ece": 0.18711999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.232, "calib/gap": 0.20345057338714828, "calib/mean_conf": 0.52816, "calib/mu_c": 0.6331404958677684, "calib/mu_w": 0.42968992248062016, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.11563999999999997, "calib/std_conf": 0.3454501619626194, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.6281516587677726, "calib/step_q_c_n": 211.0, "calib/step_q_gap": 0.1549008300384908, "calib/step_q_w": 0.47325082872928176, "calib/step_q_w_n": 362.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1431.0, "completions/max_terminated_length": 1431.0, "completions/mean_length": 427.38671875, "completions/mean_terminated_length": 432.4545593261719, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.15146666666666667, "grad_norm": 0.00481997337192297, "kl": 0.229736328125, "learning_rate": 1.6111111111111113e-06, "loss": -0.0145, "num_tokens": 31596406.0, "reward": 0.31411874294281006, "reward_std": 0.25212931632995605, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6740629076957703, "rewards/format_reward_step": 0.92578125, "rewards/step_l1_reward": -0.3255128860473633, "step": 142 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.7154727224576272, "calib/avg_num_step_conf": 2.26953125, "calib/ece": 0.21825203252032527, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.2764227642276423, "calib/gap": 0.2252635063559324, "calib/mean_conf": 0.6151626016260164, "calib/mu_c": 0.7323728813559324, "calib/mu_w": 0.507109375, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.17686991869918706, "calib/std_conf": 0.32688567092088044, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.6581127450980392, "calib/step_q_c_n": 204.0, "calib/step_q_gap": 0.22476703333853437, "calib/step_q_w": 0.4333457117595048, "calib/step_q_w_n": 377.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2672.0, "completions/max_terminated_length": 2672.0, "completions/mean_length": 445.8125, "completions/mean_terminated_length": 449.3228454589844, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.15253333333333333, "grad_norm": 0.00506249163299799, "kl": 0.212554931640625, "learning_rate": 1.5833333333333333e-06, "loss": 0.0262, "num_tokens": 31817870.0, "reward": 0.32942867279052734, "reward_std": 0.2550898492336273, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.7016031742095947, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3247770071029663, "step": 143 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.685218253968254, "calib/avg_num_step_conf": 2.16796875, "calib/ece": 0.17514056224899593, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3172690763052209, "calib/gap": 0.2356011904761905, "calib/mean_conf": 0.5937751004016064, "calib/mu_c": 0.693125, "calib/mu_w": 0.4575238095238095, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.09530120481927706, "calib/std_conf": 0.34731472137706876, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.65, "calib/step_q_c_n": 272.0, "calib/step_q_gap": 0.2069515901060071, "calib/step_q_w": 0.4430484098939929, "calib/step_q_w_n": 283.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 405.92578125, "completions/mean_terminated_length": 412.36907958984375, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.1536, "grad_norm": 0.0052193524315953255, "kl": 0.250244140625, "learning_rate": 1.5555555555555558e-06, "loss": -0.0024, "num_tokens": 32025915.0, "reward": 0.36195603013038635, "reward_std": 0.24744543433189392, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7256039381027222, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.30794191360473633, "step": 144 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6365524402907581, "calib/avg_num_step_conf": 2.0546875, "calib/ece": 0.2133864541832669, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.2788844621513944, "calib/gap": 0.15247988058151618, "calib/mean_conf": 0.5821513944223108, "calib/mu_c": 0.6471527777777778, "calib/mu_w": 0.4946728971962616, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.11091633466135459, "calib/std_conf": 0.33655412384183914, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.630611880046136, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.12860597287314024, "calib/step_q_w": 0.5020059071729958, "calib/step_q_w_n": 237.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2292.0, "completions/max_terminated_length": 2292.0, "completions/mean_length": 402.51953125, "completions/mean_terminated_length": 402.51953125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.15466666666666667, "grad_norm": 0.005096918903291225, "kl": 0.230438232421875, "learning_rate": 1.527777777777778e-06, "loss": 0.0063, "num_tokens": 32231664.0, "reward": 0.3254355192184448, "reward_std": 0.24309183657169342, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6948660612106323, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.35024493932724, "step": 145 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6668700427089689, "calib/avg_num_step_conf": 2.66015625, "calib/ece": 0.2104838709677419, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.23790322580645162, "calib/gap": 0.1873215375228796, "calib/mean_conf": 0.5492741935483871, "calib/mu_c": 0.6618181818181817, "calib/mu_w": 0.47449664429530214, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.18028225806451614, "calib/std_conf": 0.3333231343826471, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.6047111111111111, "calib/step_q_c_n": 225.0, "calib/step_q_gap": 0.1556343567251462, "calib/step_q_w": 0.4490767543859649, "calib/step_q_w_n": 456.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2363.0, "completions/max_terminated_length": 2363.0, "completions/mean_length": 431.9140625, "completions/mean_terminated_length": 440.5179443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.15573333333333333, "grad_norm": 0.005165283568203449, "kl": 0.2237548828125, "learning_rate": 1.5e-06, "loss": -0.0307, "num_tokens": 32449450.0, "reward": 0.29126691818237305, "reward_std": 0.24586060643196106, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6712952852249146, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.35438641905784607, "step": 146 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6534571723426212, "calib/avg_num_step_conf": 2.234375, "calib/ece": 0.27165282258064516, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.24193548387096775, "calib/gap": 0.19355110423116634, "calib/mean_conf": 0.5237504032258066, "calib/mu_c": 0.6431589473684212, "calib/mu_w": 0.4496078431372548, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.20616935483870968, "calib/std_conf": 0.35710478809063423, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.5963912087912089, "calib/step_q_c_n": 182.0, "calib/step_q_gap": 0.09788855921855927, "calib/step_q_w": 0.4985026495726496, "calib/step_q_w_n": 390.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 467.9609375, "completions/mean_terminated_length": 475.388916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.1568, "grad_norm": 0.0046214209869503975, "kl": 0.192108154296875, "learning_rate": 1.4722222222222225e-06, "loss": 0.0109, "num_tokens": 32672928.0, "reward": 0.2987235188484192, "reward_std": 0.2539418339729309, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6737816333770752, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.3396158218383789, "step": 147 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6424822881133561, "calib/avg_num_step_conf": 2.50390625, "calib/ece": 0.22454183266932276, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3187250996015936, "calib/gap": 0.18160062975596952, "calib/mean_conf": 0.5973705179282869, "calib/mu_c": 0.6718918918918919, "calib/mu_w": 0.4902912621359224, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.11613545816733076, "calib/std_conf": 0.35981438631180784, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.6342083892617449, "calib/step_q_c_n": 298.0, "calib/step_q_gap": 0.21874065359605005, "calib/step_q_w": 0.4154677356656949, "calib/step_q_w_n": 343.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1738.0, "completions/max_terminated_length": 1738.0, "completions/mean_length": 413.1953125, "completions/mean_terminated_length": 414.8157043457031, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.15786666666666666, "grad_norm": 0.005058152601122856, "kl": 0.229888916015625, "learning_rate": 1.4444444444444445e-06, "loss": 0.0145, "num_tokens": 32883818.0, "reward": 0.3459177315235138, "reward_std": 0.2623218894004822, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6996320486068726, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.31795281171798706, "step": 148 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6855576441102758, "calib/avg_num_step_conf": 2.48828125, "calib/ece": 0.18735177865612646, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.2134387351778656, "calib/gap": 0.23046115288220548, "calib/mean_conf": 0.5181818181818183, "calib/mu_c": 0.6393333333333333, "calib/mu_w": 0.40887218045112783, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.11561264822134389, "calib/std_conf": 0.3482950661553457, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6032245535714286, "calib/step_q_c_n": 224.0, "calib/step_q_gap": 0.2076306068401937, "calib/step_q_w": 0.3955939467312349, "calib/step_q_w_n": 413.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1472.0, "completions/max_terminated_length": 1472.0, "completions/mean_length": 441.078125, "completions/mean_terminated_length": 444.5511779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.15893333333333334, "grad_norm": 0.004603913053870201, "kl": 0.223388671875, "learning_rate": 1.4166666666666667e-06, "loss": -0.0171, "num_tokens": 33101190.0, "reward": 0.32823610305786133, "reward_std": 0.2496287077665329, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7171752452850342, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.34820303320884705, "step": 149 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.7085946883758174, "calib/avg_num_step_conf": 1.8984375, "calib/ece": 0.25006448979591833, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.3836734693877551, "calib/gap": 0.20303579340718014, "calib/mean_conf": 0.6480579591836734, "calib/mu_c": 0.7533050847457626, "calib/mu_w": 0.5502692913385825, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.20824489795918363, "calib/std_conf": 0.34109737443260196, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.6784527363184079, "calib/step_q_c_n": 201.0, "calib/step_q_gap": 0.1594004556166535, "calib/step_q_w": 0.5190522807017544, "calib/step_q_w_n": 285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2999.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 392.75390625, "completions/mean_terminated_length": 397.41107177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.16, "grad_norm": 0.005182725843042135, "kl": 0.2346649169921875, "learning_rate": 1.3888888888888892e-06, "loss": -0.011, "num_tokens": 33306695.0, "reward": 0.32051196694374084, "reward_std": 0.2669872045516968, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6684824228286743, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.30870845913887024, "step": 150 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6268835616438355, "calib/avg_num_step_conf": 1.89453125, "calib/ece": 0.2838617886178862, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.3130081300813008, "calib/gap": 0.16171780821917825, "calib/mean_conf": 0.578821138211382, "calib/mu_c": 0.6748000000000002, "calib/mu_w": 0.5130821917808219, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.22808943089430894, "calib/std_conf": 0.3483547750631152, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.6790375586854461, "calib/step_q_c_n": 142.0, "calib/step_q_gap": 0.2337022817175161, "calib/step_q_w": 0.44533527696793, "calib/step_q_w_n": 343.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1756.0, "completions/max_terminated_length": 1756.0, "completions/mean_length": 438.80859375, "completions/mean_terminated_length": 447.5498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.16106666666666666, "grad_norm": 0.005079971626400948, "kl": 0.1945037841796875, "learning_rate": 1.3611111111111112e-06, "loss": -0.0214, "num_tokens": 33526054.0, "reward": 0.27748289704322815, "reward_std": 0.25188490748405457, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.653252363204956, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.36625534296035767, "step": 151 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.6625622484480523, "calib/avg_num_step_conf": 2.30078125, "calib/ece": 0.20938524590163926, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.2827868852459016, "calib/gap": 0.20896036564567838, "calib/mean_conf": 0.5319262295081967, "calib/mu_c": 0.649252336448598, "calib/mu_w": 0.44029197080291965, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.15139344262295074, "calib/std_conf": 0.3562808495249814, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.6407761111111111, "calib/step_q_c_n": 180.0, "calib/step_q_gap": 0.1892076514534094, "calib/step_q_w": 0.4515684596577017, "calib/step_q_w_n": 409.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2099.0, "completions/max_terminated_length": 2099.0, "completions/mean_length": 420.76953125, "completions/mean_terminated_length": 425.7589111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.16213333333333332, "grad_norm": 0.004639812279492617, "kl": 0.215057373046875, "learning_rate": 1.3333333333333334e-06, "loss": -0.0143, "num_tokens": 33739163.0, "reward": 0.29684919118881226, "reward_std": 0.2769307494163513, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6758453249931335, "rewards/format_reward_step": 0.9375, "rewards/step_l1_reward": -0.35324063897132874, "step": 152 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6043184183142561, "calib/avg_num_step_conf": 2.1875, "calib/ece": 0.2664112903225807, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.31048387096774194, "calib/gap": 0.13104838709677424, "calib/mean_conf": 0.5618145161290322, "calib/mu_c": 0.6273387096774194, "calib/mu_w": 0.4962903225806452, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.16411290322580654, "calib/std_conf": 0.3539744584128025, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.6189377151799687, "calib/step_q_c_n": 213.0, "calib/step_q_gap": 0.19640169212521358, "calib/step_q_w": 0.42253602305475507, "calib/step_q_w_n": 347.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 425.14453125, "completions/mean_terminated_length": 430.185791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1632, "grad_norm": 0.005156941246241331, "kl": 0.226715087890625, "learning_rate": 1.3055555555555556e-06, "loss": -0.0357, "num_tokens": 33955320.0, "reward": 0.28570637106895447, "reward_std": 0.2652091979980469, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6410496234893799, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.35479307174682617, "step": 153 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6710442837203401, "calib/avg_num_step_conf": 1.97265625, "calib/ece": 0.202806324110672, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2608695652173913, "calib/gap": 0.21653977921583556, "calib/mean_conf": 0.5403557312252965, "calib/mu_c": 0.6618918918918919, "calib/mu_w": 0.44535211267605634, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.15221343873517792, "calib/std_conf": 0.3526369774742473, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6392572463768116, "calib/step_q_c_n": 184.0, "calib/step_q_gap": 0.16298248002167143, "calib/step_q_w": 0.47627476635514016, "calib/step_q_w_n": 321.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1054.0, "completions/max_terminated_length": 1054.0, "completions/mean_length": 391.03515625, "completions/mean_terminated_length": 394.1141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.16426666666666667, "grad_norm": 0.005356749519705772, "kl": 0.24224853515625, "learning_rate": 1.2777777777777779e-06, "loss": -0.0162, "num_tokens": 34159865.0, "reward": 0.3283141255378723, "reward_std": 0.2631031572818756, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.7153323888778687, "rewards/format_reward_step": 0.98046875, "rewards/step_l1_reward": -0.34229791164398193, "step": 154 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5638262154176739, "calib/avg_num_step_conf": 2.046875, "calib/ece": 0.2869391129032258, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.23387096774193547, "calib/gap": 0.0789156459844213, "calib/mean_conf": 0.5112866935483872, "calib/mu_c": 0.5577450980392158, "calib/mu_w": 0.4788294520547945, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.19346774193548386, "calib/std_conf": 0.34740902862434725, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.5410261780104713, "calib/step_q_c_n": 191.0, "calib/step_q_gap": 0.008083235067528394, "calib/step_q_w": 0.5329429429429429, "calib/step_q_w_n": 333.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2647.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 424.2890625, "completions/mean_terminated_length": 425.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.16533333333333333, "grad_norm": 0.0087783457711339, "kl": 0.311614990234375, "learning_rate": 1.25e-06, "loss": 0.0113, "num_tokens": 34375699.0, "reward": 0.2517399787902832, "reward_std": 0.2683244049549103, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6324470043182373, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3984982967376709, "step": 155 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6825293350717081, "calib/avg_num_step_conf": 2.5859375, "calib/ece": 0.22645161290322585, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.3225806451612903, "calib/gap": 0.21190873533246407, "calib/mean_conf": 0.5745967741935484, "calib/mu_c": 0.6856779661016948, "calib/mu_w": 0.4737692307692308, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1626209677419355, "calib/std_conf": 0.3567401674697824, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6028136882129278, "calib/step_q_c_n": 263.0, "calib/step_q_gap": 0.23851218445352929, "calib/step_q_w": 0.3643015037593985, "calib/step_q_w_n": 399.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 430.71484375, "completions/mean_terminated_length": 434.1062927246094, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.1664, "grad_norm": 0.004716483876109123, "kl": 0.216949462890625, "learning_rate": 1.2222222222222223e-06, "loss": 0.008, "num_tokens": 34590722.0, "reward": 0.3178238570690155, "reward_std": 0.2589898109436035, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6852397918701172, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.3331858515739441, "step": 156 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7456790123456791, "calib/avg_num_step_conf": 2.25390625, "calib/ece": 0.1397453815261045, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.285140562248996, "calib/gap": 0.30451384015594535, "calib/mean_conf": 0.5502136546184738, "calib/mu_c": 0.6896296296296296, "calib/mu_w": 0.38511578947368424, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.07389518072289164, "calib/std_conf": 0.3584324278796773, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6650764525993883, "calib/step_q_c_n": 327.0, "calib/step_q_gap": 0.1928768525993883, "calib/step_q_w": 0.4721996, "calib/step_q_w_n": 250.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1766.0, "completions/max_terminated_length": 1766.0, "completions/mean_length": 415.85546875, "completions/mean_terminated_length": 420.7865905761719, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.16746666666666668, "grad_norm": 0.005070955958217382, "kl": 0.218902587890625, "learning_rate": 1.1944444444444446e-06, "loss": 0.0121, "num_tokens": 34800909.0, "reward": 0.3705405592918396, "reward_std": 0.23960906267166138, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.7493710517883301, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.30672743916511536, "step": 157 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.62321875, "calib/avg_num_step_conf": 2.14453125, "calib/ece": 0.2537154150197629, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.308300395256917, "calib/gap": 0.15088875000000013, "calib/mean_conf": 0.5915810276679843, "calib/mu_c": 0.6679200000000002, "calib/mu_w": 0.51703125, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.17561264822134398, "calib/std_conf": 0.35225511544819077, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6522580645161291, "calib/step_q_c_n": 248.0, "calib/step_q_gap": 0.18049726717393644, "calib/step_q_w": 0.4717607973421927, "calib/step_q_w_n": 301.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2380.0, "completions/max_terminated_length": 2380.0, "completions/mean_length": 403.03515625, "completions/mean_terminated_length": 404.61572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.16853333333333334, "grad_norm": 0.0048288567923009396, "kl": 0.250335693359375, "learning_rate": 1.1666666666666668e-06, "loss": -0.0188, "num_tokens": 35009326.0, "reward": 0.2849690318107605, "reward_std": 0.29535138607025146, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6730554699897766, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.39530491828918457, "step": 158 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6866359447004607, "calib/avg_num_step_conf": 1.89453125, "calib/ece": 0.19579999999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.272, "calib/gap": 0.2196927803379417, "calib/mean_conf": 0.59492, "calib/mu_c": 0.7056451612903226, "calib/mu_w": 0.4859523809523809, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.14736, "calib/std_conf": 0.337823909159787, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7478567839195981, "calib/step_q_c_n": 199.0, "calib/step_q_gap": 0.2496672734300876, "calib/step_q_w": 0.4981895104895105, "calib/step_q_w_n": 286.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 395.984375, "completions/mean_terminated_length": 400.67987060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.1696, "grad_norm": 0.005460195243358612, "kl": 0.2312164306640625, "learning_rate": 1.138888888888889e-06, "loss": -0.0123, "num_tokens": 35215482.0, "reward": 0.34846895933151245, "reward_std": 0.2540527582168579, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7034000158309937, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.29474329948425293, "step": 159 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.660592998955068, "calib/avg_num_step_conf": 2.390625, "calib/ece": 0.21722177419354838, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.29435483870967744, "calib/gap": 0.2003335945663533, "calib/mean_conf": 0.5518104838709676, "calib/mu_c": 0.6584396551724139, "calib/mu_w": 0.45810606060606057, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.1506451612903226, "calib/std_conf": 0.35852603772780317, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6219877637130801, "calib/step_q_c_n": 237.0, "calib/step_q_gap": 0.17720109704641335, "calib/step_q_w": 0.4447866666666667, "calib/step_q_w_n": 375.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2728.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 445.890625, "completions/mean_terminated_length": 447.6392517089844, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.17066666666666666, "grad_norm": 0.004912951961159706, "kl": 0.2237091064453125, "learning_rate": 1.111111111111111e-06, "loss": -0.0148, "num_tokens": 35434470.0, "reward": 0.31076303124427795, "reward_std": 0.24745705723762512, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6821966171264648, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.34270179271698, "step": 160 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6679886109416311, "calib/avg_num_step_conf": 2.17578125, "calib/ece": 0.20129032258064516, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.29435483870967744, "calib/gap": 0.21405735204392912, "calib/mean_conf": 0.559516129032258, "calib/mu_c": 0.6449664429530201, "calib/mu_w": 0.43090909090909096, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.08000000000000002, "calib/std_conf": 0.3587587356218878, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6135987500000001, "calib/step_q_c_n": 320.0, "calib/step_q_gap": 0.15349832805907182, "calib/step_q_w": 0.46010042194092826, "calib/step_q_w_n": 237.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1717.0, "completions/max_terminated_length": 1717.0, "completions/mean_length": 387.46484375, "completions/mean_terminated_length": 390.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.17173333333333332, "grad_norm": 0.005282443482428789, "kl": 0.236663818359375, "learning_rate": 1.0833333333333335e-06, "loss": 0.0105, "num_tokens": 35637581.0, "reward": 0.3536580801010132, "reward_std": 0.21946199238300323, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7094613313674927, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.31152018904685974, "step": 161 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6402298850574712, "calib/avg_num_step_conf": 2.17578125, "calib/ece": 0.22977600000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.304, "calib/gap": 0.16419573070607563, "calib/mean_conf": 0.590624, "calib/mu_c": 0.6595862068965518, "calib/mu_w": 0.49539047619047616, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.12020000000000003, "calib/std_conf": 0.35114941922776977, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6168284280936455, "calib/step_q_c_n": 299.0, "calib/step_q_gap": 0.09871718778356797, "calib/step_q_w": 0.5181112403100775, "calib/step_q_w_n": 258.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 411.40234375, "completions/mean_terminated_length": 413.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.1728, "grad_norm": 0.005542648956179619, "kl": 0.2258453369140625, "learning_rate": 1.0555555555555557e-06, "loss": 0.0398, "num_tokens": 35847044.0, "reward": 0.3267264664173126, "reward_std": 0.22407203912734985, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.687700629234314, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.34049761295318604, "step": 162 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6858924141532838, "calib/avg_num_step_conf": 2.76171875, "calib/ece": 0.1811646586345381, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.23293172690763053, "calib/gap": 0.2193458676067372, "calib/mean_conf": 0.5221285140562248, "calib/mu_c": 0.6436936936936937, "calib/mu_w": 0.4243478260869565, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.12875502008032125, "calib/std_conf": 0.34453491081351667, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6264636363636363, "calib/step_q_c_n": 220.0, "calib/step_q_gap": 0.21332195258540226, "calib/step_q_w": 0.41314168377823407, "calib/step_q_w_n": 487.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 437.140625, "completions/mean_terminated_length": 442.3241271972656, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.17386666666666667, "grad_norm": 0.00472784461453557, "kl": 0.214599609375, "learning_rate": 1.0277777777777777e-06, "loss": 0.0082, "num_tokens": 36063784.0, "reward": 0.3299524486064911, "reward_std": 0.2258731722831726, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7127512097358704, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.33331501483917236, "step": 163 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.7229080475110102, "calib/avg_num_step_conf": 2.49609375, "calib/ece": 0.16824489795918357, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.27755102040816326, "calib/gap": 0.2610036033631389, "calib/mean_conf": 0.5477551020408162, "calib/mu_c": 0.6830508474576271, "calib/mu_w": 0.42204724409448824, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.11718367346938768, "calib/std_conf": 0.3465201075304993, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.5682908424908425, "calib/step_q_c_n": 273.0, "calib/step_q_gap": 0.12329084249084249, "calib/step_q_w": 0.445, "calib/step_q_w_n": 366.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2211.0, "completions/max_terminated_length": 2211.0, "completions/mean_length": 425.19921875, "completions/mean_terminated_length": 437.152587890625, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.17493333333333333, "grad_norm": 0.005297855939716101, "kl": 0.23809814453125, "learning_rate": 1.0000000000000002e-06, "loss": -0.058, "num_tokens": 36278771.0, "reward": 0.3335390090942383, "reward_std": 0.2471492439508438, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7153980731964111, "rewards/format_reward_step": 0.9453125, "rewards/step_l1_reward": -0.3303512632846832, "step": 164 }, { "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6616358024691358, "calib/avg_num_step_conf": 2.28125, "calib/ece": 0.1992156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.24705882352941178, "calib/gap": 0.19099999999999978, "calib/mean_conf": 0.5538823529411765, "calib/mu_c": 0.6549999999999999, "calib/mu_w": 0.46400000000000013, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14125490196078427, "calib/std_conf": 0.33801645435403216, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6717857142857143, "calib/step_q_c_n": 252.0, "calib/step_q_gap": 0.2090146299483649, "calib/step_q_w": 0.4627710843373494, "calib/step_q_w_n": 332.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 430.40625, "completions/mean_terminated_length": 432.0941467285156, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.176, "grad_norm": 0.005576414056122303, "kl": 0.22052001953125, "learning_rate": 9.722222222222224e-07, "loss": -0.0166, "num_tokens": 36494531.0, "reward": 0.3354248106479645, "reward_std": 0.2429950088262558, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7219324111938477, "rewards/format_reward_step": 0.9921875, "rewards/step_l1_reward": -0.34327030181884766, "step": 165 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.7292634372926343, "calib/avg_num_step_conf": 1.875, "calib/ece": 0.1714170040485829, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.2874493927125506, "calib/gap": 0.289745189117452, "calib/mean_conf": 0.5408906882591091, "calib/mu_c": 0.6699270072992701, "calib/mu_w": 0.3801818181818181, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.078825910931174, "calib/std_conf": 0.35956566268727164, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.686, "calib/step_q_c_n": 240.0, "calib/step_q_gap": 0.21722208333333343, "calib/step_q_w": 0.4687779166666666, "calib/step_q_w_n": 240.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 452.44921875, "completions/mean_terminated_length": 459.6309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.17706666666666668, "grad_norm": 0.004611345008015633, "kl": 0.208587646484375, "learning_rate": 9.444444444444445e-07, "loss": 0.0402, "num_tokens": 36716542.0, "reward": 0.36253562569618225, "reward_std": 0.23905181884765625, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.7260754108428955, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.29787909984588623, "step": 166 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6372400756143667, "calib/avg_num_step_conf": 2.02734375, "calib/ece": 0.23885375494071148, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.33201581027667987, "calib/gap": 0.14594202898550745, "calib/mean_conf": 0.6165612648221345, "calib/mu_c": 0.6828985507246378, "calib/mu_w": 0.5369565217391303, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1549802371541502, "calib/std_conf": 0.33285510800716794, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6601388888888889, "calib/step_q_c_n": 240.0, "calib/step_q_gap": 0.10839193548387116, "calib/step_q_w": 0.5517469534050178, "calib/step_q_w_n": 279.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1262.0, "completions/max_terminated_length": 1262.0, "completions/mean_length": 396.23828125, "completions/mean_terminated_length": 397.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 32.0, "epoch": 0.17813333333333334, "grad_norm": 0.005330167710781097, "kl": 0.23419189453125, "learning_rate": 9.166666666666666e-07, "loss": 0.0112, "num_tokens": 36923587.0, "reward": 0.3239728808403015, "reward_std": 0.24499499797821045, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6964414119720459, "rewards/format_reward_step": 0.984375, "rewards/step_l1_reward": -0.35318315029144287, "step": 167 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6354657247514389, "calib/avg_num_step_conf": 2.953125, "calib/ece": 0.20932270916334655, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2749003984063745, "calib/gap": 0.16906658817373094, "calib/mean_conf": 0.5298804780876494, "calib/mu_c": 0.5999319727891156, "calib/mu_w": 0.43086538461538465, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0767729083665338, "calib/std_conf": 0.33977316322449624, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.5348456790123456, "calib/step_q_c_n": 324.0, "calib/step_q_gap": 0.1928317901234567, "calib/step_q_w": 0.3420138888888889, "calib/step_q_w_n": 432.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2954.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 485.12109375, "completions/mean_terminated_length": 487.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.1792, "grad_norm": 0.004749679937958717, "kl": 0.204132080078125, "learning_rate": 8.88888888888889e-07, "loss": 0.0393, "num_tokens": 37152450.0, "reward": 0.34512436389923096, "reward_std": 0.25722041726112366, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.7028480172157288, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.3227555453777313, "step": 168 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7194544057377049, "calib/avg_num_step_conf": 2.0859375, "calib/ece": 0.202956, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.356, "calib/gap": 0.2582685706967212, "calib/mean_conf": 0.596004, "calib/mu_c": 0.7220390624999999, "calib/mu_w": 0.46377049180327873, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14348, "calib/std_conf": 0.3596031645911921, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.6753700440528633, "calib/step_q_c_n": 227.0, "calib/step_q_gap": 0.22620717760335185, "calib/step_q_w": 0.4491628664495114, "calib/step_q_w_n": 307.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1589.0, "completions/max_terminated_length": 1589.0, "completions/mean_length": 412.7890625, "completions/mean_terminated_length": 414.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.18026666666666666, "grad_norm": 0.004710485693067312, "kl": 0.2314453125, "learning_rate": 8.611111111111112e-07, "loss": -0.0115, "num_tokens": 37362308.0, "reward": 0.3566637635231018, "reward_std": 0.24371802806854248, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7182464599609375, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.29788774251937866, "step": 169 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7219016393442623, "calib/avg_num_step_conf": 2.09375, "calib/ece": 0.17457449392712548, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.25101214574898784, "calib/gap": 0.2550772327868852, "calib/mean_conf": 0.5658303643724696, "calib/mu_c": 0.6949180327868852, "calib/mu_w": 0.43984080000000003, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.12323886639676111, "calib/std_conf": 0.34377573486158725, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.6957510729613733, "calib/step_q_c_n": 233.0, "calib/step_q_gap": 0.21651014886896414, "calib/step_q_w": 0.47924092409240915, "calib/step_q_w_n": 303.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2796.0, "completions/max_terminated_length": 2796.0, "completions/mean_length": 454.77734375, "completions/mean_terminated_length": 458.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.18133333333333335, "grad_norm": 0.004924747161567211, "kl": 0.2138824462890625, "learning_rate": 8.333333333333333e-07, "loss": 0.0742, "num_tokens": 37582883.0, "reward": 0.3429233431816101, "reward_std": 0.2477169930934906, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.717066764831543, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.31637632846832275, "step": 170 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6727064657638755, "calib/avg_num_step_conf": 2.40234375, "calib/ece": 0.2091177165354331, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2283464566929134, "calib/gap": 0.21798596859304487, "calib/mean_conf": 0.4995437007874015, "calib/mu_c": 0.6257009345794394, "calib/mu_w": 0.40771496598639456, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14370078740157483, "calib/std_conf": 0.34959338543154, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.626068376068376, "calib/step_q_c_n": 234.0, "calib/step_q_gap": 0.16297126320748356, "calib/step_q_w": 0.4630971128608924, "calib/step_q_w_n": 381.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2297.0, "completions/max_terminated_length": 2297.0, "completions/mean_length": 449.28515625, "completions/mean_terminated_length": 449.28515625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1824, "grad_norm": 0.0047374023124575615, "kl": 0.2057952880859375, "learning_rate": 8.055555555555557e-07, "loss": -0.004, "num_tokens": 37804796.0, "reward": 0.33203035593032837, "reward_std": 0.2442462295293808, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.7250390648841858, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.34222835302352905, "step": 171 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6917808219178081, "calib/avg_num_step_conf": 2.28125, "calib/ece": 0.17857142857142852, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3611111111111111, "calib/gap": 0.2352390798655984, "calib/mean_conf": 0.6316666666666667, "calib/mu_c": 0.7306164383561644, "calib/mu_w": 0.495377358490566, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.11543650793650789, "calib/std_conf": 0.34227587963188083, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.6494339416058394, "calib/step_q_c_n": 274.0, "calib/step_q_gap": 0.1469052319284201, "calib/step_q_w": 0.5025287096774194, "calib/step_q_w_n": 310.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 399.25, "completions/mean_terminated_length": 400.8157043457031, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.18346666666666667, "grad_norm": 0.0047450498677790165, "kl": 0.21240234375, "learning_rate": 7.777777777777779e-07, "loss": 0.0381, "num_tokens": 38010356.0, "reward": 0.37122833728790283, "reward_std": 0.24598166346549988, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7320832014083862, "rewards/format_reward_step": 0.9765625, "rewards/step_l1_reward": -0.2990015149116516, "step": 172 }, { "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6394942157654022, "calib/avg_num_step_conf": 2.3984375, "calib/ece": 0.26360655737704924, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.36885245901639346, "calib/gap": 0.15228679042238358, "calib/mean_conf": 0.6523770491803279, "calib/mu_c": 0.7310169491525425, "calib/mu_w": 0.5787301587301589, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.21618852459016402, "calib/std_conf": 0.33808174702278915, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.7052838427947598, "calib/step_q_c_n": 229.0, "calib/step_q_gap": 0.1858682583791753, "calib/step_q_w": 0.5194155844155844, "calib/step_q_w_n": 385.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2599.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 445.59765625, "completions/mean_terminated_length": 449.1062927246094, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.18453333333333333, "grad_norm": 0.00468338280916214, "kl": 0.2205810546875, "learning_rate": 7.5e-07, "loss": 0.0231, "num_tokens": 38227589.0, "reward": 0.2847577929496765, "reward_std": 0.29933086037635803, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.647951602935791, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3612484633922577, "step": 173 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5515151515151515, "calib/avg_num_step_conf": 2.625, "calib/ece": 0.3007258064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.24193548387096775, "calib/gap": 0.051697635697635635, "calib/mean_conf": 0.53, "calib/mu_c": 0.5598095238095238, "calib/mu_w": 0.5081118881118881, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.20366935483870968, "calib/std_conf": 0.339860028114042, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.5228379182156133, "calib/step_q_c_n": 269.0, "calib/step_q_gap": 0.00943014319493507, "calib/step_q_w": 0.5134077750206782, "calib/step_q_w_n": 403.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2556.0, "completions/max_terminated_length": 2556.0, "completions/mean_length": 503.828125, "completions/mean_terminated_length": 505.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1856, "grad_norm": 0.0047071753069758415, "kl": 0.1872100830078125, "learning_rate": 7.222222222222222e-07, "loss": 0.004, "num_tokens": 38460801.0, "reward": 0.25245046615600586, "reward_std": 0.23629821836948395, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6287406086921692, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.3980584740638733, "step": 174 }, { "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.7098421868395096, "calib/avg_num_step_conf": 3.8125, "calib/ece": 0.18236734693877554, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.23265306122448978, "calib/gap": 0.26061716218120345, "calib/mean_conf": 0.5146938775510204, "calib/mu_c": 0.6753191489361703, "calib/mu_w": 0.41470198675496683, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.15669387755102043, "calib/std_conf": 0.3448957372226458, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.5815013888888889, "calib/step_q_c_n": 240.0, "calib/step_q_gap": 0.21499839975845414, "calib/step_q_w": 0.3665029891304348, "calib/step_q_w_n": 736.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 470.05859375, "completions/mean_terminated_length": 479.4223327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.18666666666666668, "grad_norm": 0.0049662720412015915, "kl": 0.198516845703125, "learning_rate": 6.944444444444446e-07, "loss": -0.0635, "num_tokens": 38686960.0, "reward": 0.3290756940841675, "reward_std": 0.24869877099990845, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.7184125185012817, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.3266673684120178, "step": 175 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.727698771816419, "calib/avg_num_step_conf": 2.5625, "calib/ece": 0.1468674698795181, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.27710843373493976, "calib/gap": 0.27351519069166136, "calib/mean_conf": 0.5694377510040161, "calib/mu_c": 0.7001538461538462, "calib/mu_w": 0.4266386554621849, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.09710843373493978, "calib/std_conf": 0.3488830213566231, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.6834782608695652, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.34399469279444783, "calib/step_q_w": 0.33948356807511737, "calib/step_q_w_n": 426.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2815.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 459.83984375, "completions/mean_terminated_length": 463.46063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.18773333333333334, "grad_norm": 0.0047350418753921986, "kl": 0.19427490234375, "learning_rate": 6.666666666666667e-07, "loss": 0.0135, "num_tokens": 38908743.0, "reward": 0.361766517162323, "reward_std": 0.2339170277118683, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7320382595062256, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.302255243062973, "step": 176 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6748285677319188, "calib/avg_num_step_conf": 2.20703125, "calib/ece": 0.18020080321285142, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.24497991967871485, "calib/gap": 0.2059600207012548, "calib/mean_conf": 0.5447791164658635, "calib/mu_c": 0.6531355932203388, "calib/mu_w": 0.44717557251908396, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.1255421686746988, "calib/std_conf": 0.34156157371721896, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.66644, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.22691164179104473, "calib/step_q_w": 0.4395283582089553, "calib/step_q_w_n": 335.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2386.0, "completions/max_terminated_length": 2386.0, "completions/mean_length": 436.40234375, "completions/mean_terminated_length": 438.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1888, "grad_norm": 0.005169384181499481, "kl": 0.2012939453125, "learning_rate": 6.388888888888889e-07, "loss": -0.0235, "num_tokens": 39124294.0, "reward": 0.3260233402252197, "reward_std": 0.24973660707473755, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7051289081573486, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.33980101346969604, "step": 177 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.663415853963491, "calib/avg_num_step_conf": 2.29296875, "calib/ece": 0.21162055335968386, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.31225296442687744, "calib/gap": 0.19892285571392843, "calib/mean_conf": 0.6147826086956523, "calib/mu_c": 0.7162096774193548, "calib/mu_w": 0.5172868217054264, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.16814229249011864, "calib/std_conf": 0.34112875319856484, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6415535714285714, "calib/step_q_c_n": 280.0, "calib/step_q_gap": 0.11552197533736619, "calib/step_q_w": 0.5260315960912052, "calib/step_q_w_n": 307.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1895.0, "completions/max_terminated_length": 1895.0, "completions/mean_length": 396.66015625, "completions/mean_terminated_length": 399.7834777832031, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.18986666666666666, "grad_norm": 0.005466687958687544, "kl": 0.215423583984375, "learning_rate": 6.111111111111112e-07, "loss": -0.0249, "num_tokens": 39331911.0, "reward": 0.3221646845340729, "reward_std": 0.25131478905677795, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6939695477485657, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.340265154838562, "step": 178 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6278143182393118, "calib/avg_num_step_conf": 2.921875, "calib/ece": 0.24039682539682533, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.30158730158730157, "calib/gap": 0.15215785479382749, "calib/mean_conf": 0.5884126984126985, "calib/mu_c": 0.6693220338983051, "calib/mu_w": 0.5171641791044777, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.18027777777777773, "calib/std_conf": 0.3388892048621255, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.6327671232876714, "calib/step_q_c_n": 219.0, "calib/step_q_gap": 0.23298886241810618, "calib/step_q_w": 0.3997782608695652, "calib/step_q_w_n": 529.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 396.3046875, "completions/mean_terminated_length": 401.00396728515625, "completions/min_length": 0.0, "completions/min_terminated_length": 114.0, "epoch": 0.19093333333333334, "grad_norm": 0.005298725329339504, "kl": 0.234222412109375, "learning_rate": 5.833333333333334e-07, "loss": -0.0395, "num_tokens": 39539629.0, "reward": 0.29585397243499756, "reward_std": 0.27797776460647583, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6789250373840332, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.37315452098846436, "step": 179 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7227230850223745, "calib/avg_num_step_conf": 1.98828125, "calib/ece": 0.17453441295546562, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.3076923076923077, "calib/gap": 0.26643327191366145, "calib/mean_conf": 0.5857894736842105, "calib/mu_c": 0.7109160305343512, "calib/mu_w": 0.4444827586206897, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.11497975708502028, "calib/std_conf": 0.3553445101037254, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.6578779735682819, "calib/step_q_c_n": 227.0, "calib/step_q_gap": 0.21603400193707628, "calib/step_q_w": 0.44184397163120565, "calib/step_q_w_n": 282.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1876.0, "completions/max_terminated_length": 1876.0, "completions/mean_length": 462.7734375, "completions/mean_terminated_length": 468.2608947753906, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.192, "grad_norm": 0.004950664006173611, "kl": 0.20599365234375, "learning_rate": 5.555555555555555e-07, "loss": -0.0023, "num_tokens": 39761955.0, "reward": 0.35782623291015625, "reward_std": 0.26714158058166504, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.7177339792251587, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.29505032300949097, "step": 180 }, { "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.7027454780361757, "calib/avg_num_step_conf": 2.36328125, "calib/ece": 0.15698795180722888, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.26506024096385544, "calib/gap": 0.23917829457364342, "calib/mean_conf": 0.554578313253012, "calib/mu_c": 0.66984496124031, "calib/mu_w": 0.4306666666666666, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0967469879518072, "calib/std_conf": 0.34338487968159287, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.6106597014925373, "calib/step_q_c_n": 268.0, "calib/step_q_gap": 0.18756375688323956, "calib/step_q_w": 0.4230959446092978, "calib/step_q_w_n": 337.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 426.63671875, "completions/mean_terminated_length": 429.9960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.19306666666666666, "grad_norm": 0.005251111462712288, "kl": 0.2293853759765625, "learning_rate": 5.277777777777779e-07, "loss": -0.0224, "num_tokens": 39977438.0, "reward": 0.3561040759086609, "reward_std": 0.26347535848617554, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7260953187942505, "rewards/format_reward_step": 0.96875, "rewards/step_l1_reward": -0.3084184229373932, "step": 181 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7578456318914333, "calib/avg_num_step_conf": 1.99609375, "calib/ece": 0.14294354838709683, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.2782258064516129, "calib/gap": 0.2923748939779475, "calib/mean_conf": 0.6222177419354838, "calib/mu_c": 0.7601526717557253, "calib/mu_w": 0.4677777777777778, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.1184677419354839, "calib/std_conf": 0.32491115561036443, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.7211282608695652, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.22710833204394237, "calib/step_q_w": 0.4940199288256228, "calib/step_q_w_n": 281.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3027.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 448.94921875, "completions/mean_terminated_length": 450.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.19413333333333332, "grad_norm": 0.005027151666581631, "kl": 0.2074432373046875, "learning_rate": 5.000000000000001e-07, "loss": 0.0303, "num_tokens": 40198529.0, "reward": 0.369170606136322, "reward_std": 0.26670369505882263, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.750970721244812, "rewards/format_reward_step": 0.9609375, "rewards/step_l1_reward": -0.3079419732093811, "step": 182 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7034108231707318, "calib/avg_num_step_conf": 2.23828125, "calib/ece": 0.19454183266932268, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.29880478087649404, "calib/gap": 0.2619651930894309, "calib/mean_conf": 0.5507171314741035, "calib/mu_c": 0.6843089430894309, "calib/mu_w": 0.42234375, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.12760956175298802, "calib/std_conf": 0.3620177568025872, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.6180842911877394, "calib/step_q_c_n": 261.0, "calib/step_q_gap": 0.1559689065723548, "calib/step_q_w": 0.46211538461538465, "calib/step_q_w_n": 312.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1938.0, "completions/max_terminated_length": 1938.0, "completions/mean_length": 454.4609375, "completions/mean_terminated_length": 456.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1952, "grad_norm": 0.004665024112910032, "kl": 0.200042724609375, "learning_rate": 4.7222222222222226e-07, "loss": 0.0071, "num_tokens": 40421551.0, "reward": 0.3409632444381714, "reward_std": 0.260761559009552, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7225355505943298, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.32967162132263184, "step": 183 }, { "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.7162499999999999, "calib/avg_num_step_conf": 2.32421875, "calib/ece": 0.2200395256916996, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3359683794466403, "calib/gap": 0.24769499999999994, "calib/mean_conf": 0.6069960474308301, "calib/mu_c": 0.729375, "calib/mu_w": 0.48168000000000005, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.16055335968379447, "calib/std_conf": 0.3518978303262607, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.6707046070460705, "calib/step_q_c_n": 246.0, "calib/step_q_gap": 0.1916146356993656, "calib/step_q_w": 0.4790899713467049, "calib/step_q_w_n": 349.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1910.0, "completions/max_terminated_length": 1910.0, "completions/mean_length": 423.39453125, "completions/mean_terminated_length": 425.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.19626666666666667, "grad_norm": 0.0054717957973480225, "kl": 0.2116241455078125, "learning_rate": 4.444444444444445e-07, "loss": -0.0021, "num_tokens": 40635220.0, "reward": 0.33307600021362305, "reward_std": 0.28628167510032654, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7161625027656555, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.3445417284965515, "step": 184 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.7125305457507468, "calib/avg_num_step_conf": 2.2578125, "calib/ece": 0.2018518518518519, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.3292181069958848, "calib/gap": 0.262407683953299, "calib/mean_conf": 0.6115637860082305, "calib/mu_c": 0.7487068965517242, "calib/mu_w": 0.4862992125984252, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.16802469135802472, "calib/std_conf": 0.35169785574241746, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.6936652835408023, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.26402196009866574, "calib/step_q_w": 0.4296433234421365, "calib/step_q_w_n": 337.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2799.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 465.0234375, "completions/mean_terminated_length": 474.286865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.19733333333333333, "grad_norm": 0.004754678346216679, "kl": 0.1771392822265625, "learning_rate": 4.1666666666666667e-07, "loss": -0.0247, "num_tokens": 40861186.0, "reward": 0.3285626769065857, "reward_std": 0.29156944155693054, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6866241693496704, "rewards/format_reward_step": 0.93359375, "rewards/step_l1_reward": -0.3068425953388214, "step": 185 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.681418561607615, "calib/avg_num_step_conf": 2.22265625, "calib/ece": 0.21615176151761523, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.32113821138211385, "calib/gap": 0.2182372642340914, "calib/mean_conf": 0.5773712737127371, "calib/mu_c": 0.6873770491803279, "calib/mu_w": 0.46913978494623654, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1487940379403795, "calib/std_conf": 0.3578489515041175, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.7136086956521739, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.2396263947672181, "calib/step_q_w": 0.4739823008849558, "calib/step_q_w_n": 339.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 421.35546875, "completions/mean_terminated_length": 429.7490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.1984, "grad_norm": 0.005402509588748217, "kl": 0.21337890625, "learning_rate": 3.8888888888888895e-07, "loss": 0.0002, "num_tokens": 41074093.0, "reward": 0.3318336606025696, "reward_std": 0.24953043460845947, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6887925863265991, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3102814555168152, "step": 186 }, { "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.7013105451597649, "calib/avg_num_step_conf": 3.12109375, "calib/ece": 0.17158469945355187, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.26639344262295084, "calib/gap": 0.2301821702807988, "calib/mean_conf": 0.5464480874316939, "calib/mu_c": 0.6700294985250737, "calib/mu_w": 0.43984732824427486, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.1274590163934426, "calib/std_conf": 0.33904135658692974, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.6229629629629629, "calib/step_q_c_n": 243.0, "calib/step_q_gap": 0.28879947375432985, "calib/step_q_w": 0.3341634892086331, "calib/step_q_w_n": 556.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 509.32421875, "completions/mean_terminated_length": 517.4087524414062, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.19946666666666665, "grad_norm": 0.0040683439001441, "kl": 0.1923675537109375, "learning_rate": 3.611111111111111e-07, "loss": -0.0137, "num_tokens": 41306024.0, "reward": 0.33687639236450195, "reward_std": 0.23229585587978363, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6975290775299072, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.3003388047218323, "step": 187 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6816679379608441, "calib/avg_num_step_conf": 2.8515625, "calib/ece": 0.2061111111111112, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3134920634920635, "calib/gap": 0.2120747520976355, "calib/mean_conf": 0.5808730158730159, "calib/mu_c": 0.6768115942028986, "calib/mu_w": 0.4647368421052631, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.11968253968253975, "calib/std_conf": 0.3575712157755738, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6020120401337793, "calib/step_q_c_n": 299.0, "calib/step_q_gap": 0.18791149875713586, "calib/step_q_w": 0.4141005413766434, "calib/step_q_w_n": 431.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2474.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 501.99609375, "completions/mean_terminated_length": 501.99609375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.20053333333333334, "grad_norm": 0.0044453563168644905, "kl": 0.183319091796875, "learning_rate": 3.3333333333333335e-07, "loss": 0.0118, "num_tokens": 41538607.0, "reward": 0.33286768198013306, "reward_std": 0.27241426706314087, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7093316316604614, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.3459400534629822, "step": 188 }, { "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.7370574741149482, "calib/avg_num_step_conf": 1.82421875, "calib/ece": 0.14645669291338584, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2677165354330709, "calib/gap": 0.2864566929133858, "calib/mean_conf": 0.5159842519685041, "calib/mu_c": 0.6592125984251969, "calib/mu_w": 0.37275590551181104, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08122047244094491, "calib/std_conf": 0.3521458624113264, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.6631730769230769, "calib/step_q_c_n": 208.0, "calib/step_q_gap": 0.24314489159489167, "calib/step_q_w": 0.42002818532818526, "calib/step_q_w_n": 259.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1081.0, "completions/max_terminated_length": 1081.0, "completions/mean_length": 396.578125, "completions/mean_terminated_length": 398.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.2016, "grad_norm": 0.005412383936345577, "kl": 0.22686767578125, "learning_rate": 3.055555555555556e-07, "loss": 0.015, "num_tokens": 41747899.0, "reward": 0.360355019569397, "reward_std": 0.2340199053287506, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.759178876876831, "rewards/format_reward_step": 0.98828125, "rewards/step_l1_reward": -0.3353438377380371, "step": 189 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7228691197019097, "calib/avg_num_step_conf": 2.4296875, "calib/ece": 0.1691463414634146, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.2682926829268293, "calib/gap": 0.2751380664049505, "calib/mean_conf": 0.5415853658536586, "calib/mu_c": 0.6679699248120301, "calib/mu_w": 0.3928318584070796, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.085040650406504, "calib/std_conf": 0.35661280104217036, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.621969111969112, "calib/step_q_c_n": 259.0, "calib/step_q_gap": 0.20846745907655007, "calib/step_q_w": 0.4135016528925619, "calib/step_q_w_n": 363.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1922.0, "completions/max_terminated_length": 1922.0, "completions/mean_length": 473.79296875, "completions/mean_terminated_length": 481.3135070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.20266666666666666, "grad_norm": 0.004683112259954214, "kl": 0.18890380859375, "learning_rate": 2.7777777777777776e-07, "loss": 0.0779, "num_tokens": 41974798.0, "reward": 0.36603257060050964, "reward_std": 0.24707284569740295, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7150722742080688, "rewards/format_reward_step": 0.94140625, "rewards/step_l1_reward": -0.27519458532333374, "step": 190 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6186642814549792, "calib/avg_num_step_conf": 2.4375, "calib/ece": 0.22776422764227644, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.32113821138211385, "calib/gap": 0.16807195388590723, "calib/mean_conf": 0.5994715447154471, "calib/mu_c": 0.6876068376068375, "calib/mu_w": 0.5195348837209303, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.17581300813008133, "calib/std_conf": 0.34839254155540555, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6482170542635659, "calib/step_q_c_n": 258.0, "calib/step_q_gap": 0.1743445588173, "calib/step_q_w": 0.4738724954462659, "calib/step_q_w_n": 366.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1973.0, "completions/max_terminated_length": 1973.0, "completions/mean_length": 411.7421875, "completions/mean_terminated_length": 418.2778015136719, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.20373333333333332, "grad_norm": 0.005159609951078892, "kl": 0.217193603515625, "learning_rate": 2.5000000000000004e-07, "loss": -0.0282, "num_tokens": 42184372.0, "reward": 0.29070740938186646, "reward_std": 0.24974632263183594, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6620468497276306, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3618820309638977, "step": 191 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7324285339627592, "calib/avg_num_step_conf": 2.3671875, "calib/ece": 0.17230769230769227, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.29554655870445345, "calib/gap": 0.2744623655913981, "calib/mean_conf": 0.5644534412955466, "calib/mu_c": 0.7011290322580648, "calib/mu_w": 0.4266666666666667, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.11736842105263155, "calib/std_conf": 0.35029369525228626, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.6664016736401674, "calib/step_q_c_n": 239.0, "calib/step_q_gap": 0.22390467091537175, "calib/step_q_w": 0.4424970027247957, "calib/step_q_w_n": 367.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2284.0, "completions/max_terminated_length": 2284.0, "completions/mean_length": 467.73828125, "completions/mean_terminated_length": 471.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.2048, "grad_norm": 0.005066120997071266, "kl": 0.1961669921875, "learning_rate": 2.2222222222222224e-07, "loss": 0.0182, "num_tokens": 42409089.0, "reward": 0.35959967970848083, "reward_std": 0.2445959597826004, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7285863161087036, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.2976681590080261, "step": 192 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5780069819105046, "calib/avg_num_step_conf": 2.34375, "calib/ece": 0.28694444444444445, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.25396825396825395, "calib/gap": 0.09550682323072035, "calib/mean_conf": 0.5196428571428572, "calib/mu_c": 0.5715652173913043, "calib/mu_w": 0.47605839416058393, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1751190476190476, "calib/std_conf": 0.3572828892140933, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.614022038567493, "calib/step_q_c_n": 242.0, "calib/step_q_gap": 0.16272706650045393, "calib/step_q_w": 0.4512949720670391, "calib/step_q_w_n": 358.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2361.0, "completions/max_terminated_length": 2361.0, "completions/mean_length": 451.8203125, "completions/mean_terminated_length": 457.1778869628906, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.20586666666666667, "grad_norm": 0.0044546108692884445, "kl": 0.190399169921875, "learning_rate": 1.9444444444444447e-07, "loss": -0.0066, "num_tokens": 42630467.0, "reward": 0.288581907749176, "reward_std": 0.2478843331336975, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6464488506317139, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.35209745168685913, "step": 193 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7408296523910121, "calib/avg_num_step_conf": 2.4921875, "calib/ece": 0.15487160000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.292, "calib/gap": 0.298392017156392, "calib/mean_conf": 0.5328883999999999, "calib/mu_c": 0.6844715447154471, "calib/mu_w": 0.3860795275590551, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09788000000000006, "calib/std_conf": 0.3563361373555032, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.5929262295081966, "calib/step_q_c_n": 244.0, "calib/step_q_gap": 0.18516353915286665, "calib/step_q_w": 0.40776269035532997, "calib/step_q_w_n": 394.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 442.453125, "completions/mean_terminated_length": 444.1882629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.20693333333333333, "grad_norm": 0.005256724078208208, "kl": 0.1939239501953125, "learning_rate": 1.6666666666666668e-07, "loss": 0.0259, "num_tokens": 42849679.0, "reward": 0.38366755843162537, "reward_std": 0.22829414904117584, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.7520594596862793, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.2761306166648865, "step": 194 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6435240762971857, "calib/avg_num_step_conf": 2.9765625, "calib/ece": 0.2221024489795918, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.2653061224489796, "calib/gap": 0.17944061624649865, "calib/mean_conf": 0.5359383673469387, "calib/mu_c": 0.6230952380952381, "calib/mu_w": 0.4436546218487395, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.12187755102040815, "calib/std_conf": 0.35479391131689186, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6177318295739348, "calib/step_q_c_n": 266.0, "calib/step_q_gap": 0.23394150699328958, "calib/step_q_w": 0.3837903225806452, "calib/step_q_w_n": 496.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 453.1875, "completions/mean_terminated_length": 460.3809814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.208, "grad_norm": 0.0048271650448441505, "kl": 0.201446533203125, "learning_rate": 1.3888888888888888e-07, "loss": 0.0325, "num_tokens": 43071679.0, "reward": 0.3020411729812622, "reward_std": 0.25643521547317505, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6751362681388855, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.3601164221763611, "step": 195 }, { "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6687609402350587, "calib/avg_num_step_conf": 1.9296875, "calib/ece": 0.2553359683794467, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.43478260869565216, "calib/gap": 0.19564766191547878, "calib/mean_conf": 0.6760474308300395, "calib/mu_c": 0.771937984496124, "calib/mu_w": 0.5762903225806453, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.21075098814229257, "calib/std_conf": 0.34119556961169406, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6745349794238684, "calib/step_q_c_n": 243.0, "calib/step_q_gap": 0.10173298739199588, "calib/step_q_w": 0.5728019920318725, "calib/step_q_w_n": 251.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1446.0, "completions/max_terminated_length": 1446.0, "completions/mean_length": 353.94921875, "completions/mean_terminated_length": 355.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.20906666666666668, "grad_norm": 0.005029777064919472, "kl": 0.22454833984375, "learning_rate": 1.1111111111111112e-07, "loss": -0.0013, "num_tokens": 43264834.0, "reward": 0.31094813346862793, "reward_std": 0.2894595265388489, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6839672327041626, "rewards/format_reward_step": 0.97265625, "rewards/step_l1_reward": -0.3573834300041199, "step": 196 }, { "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.7118863049095607, "calib/avg_num_step_conf": 2.53125, "calib/ece": 0.16815261044176702, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.28112449799196787, "calib/gap": 0.2436937984496123, "calib/mean_conf": 0.5722489959839357, "calib/mu_c": 0.6984999999999999, "calib/mu_w": 0.4548062015503876, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.12923694779116462, "calib/std_conf": 0.3316754386980413, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.664102564102564, "calib/step_q_c_n": 234.0, "calib/step_q_gap": 0.2018803418803417, "calib/step_q_w": 0.46222222222222226, "calib/step_q_w_n": 414.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2145.0, "completions/max_terminated_length": 2145.0, "completions/mean_length": 451.1953125, "completions/mean_terminated_length": 452.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.21013333333333334, "grad_norm": 0.004593154415488243, "kl": 0.219757080078125, "learning_rate": 8.333333333333334e-08, "loss": 0.0146, "num_tokens": 43485396.0, "reward": 0.3464347720146179, "reward_std": 0.23981933295726776, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.7292242050170898, "rewards/format_reward_step": 0.96484375, "rewards/step_l1_reward": -0.3230733871459961, "step": 197 }, { "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.7505573770491805, "calib/avg_num_step_conf": 2.3203125, "calib/ece": 0.16368421052631582, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.3157894736842105, "calib/gap": 0.2978478688524591, "calib/mean_conf": 0.5904048582995953, "calib/mu_c": 0.7375200000000001, "calib/mu_w": 0.43967213114754095, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.12400809716599193, "calib/std_conf": 0.3462310636700311, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7191450131233597, "calib/step_q_c_n": 254.0, "calib/step_q_gap": 0.21238030724100676, "calib/step_q_w": 0.5067647058823529, "calib/step_q_w_n": 340.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1913.0, "completions/max_terminated_length": 1913.0, "completions/mean_length": 403.33984375, "completions/mean_terminated_length": 406.5157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.2112, "grad_norm": 0.005886967293918133, "kl": 0.215576171875, "learning_rate": 5.555555555555556e-08, "loss": 0.0155, "num_tokens": 43694035.0, "reward": 0.365684449672699, "reward_std": 0.22702525556087494, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.7409449219703674, "rewards/format_reward_step": 0.95703125, "rewards/step_l1_reward": -0.2986384928226471, "step": 198 }, { "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.7204724409448819, "calib/avg_num_step_conf": 2.27734375, "calib/ece": 0.20097165991902832, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.3481781376518219, "calib/gap": 0.24710695538057742, "calib/mean_conf": 0.6190283400809716, "calib/mu_c": 0.7460833333333333, "calib/mu_w": 0.4989763779527559, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.167085020242915, "calib/std_conf": 0.3413296624498007, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.6792417061611375, "calib/step_q_c_n": 211.0, "calib/step_q_gap": 0.17360755981967413, "calib/step_q_w": 0.5056341463414634, "calib/step_q_w_n": 369.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1937.0, "completions/max_terminated_length": 1937.0, "completions/mean_length": 442.73046875, "completions/mean_terminated_length": 451.5498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.21226666666666666, "grad_norm": 0.004877576604485512, "kl": 0.2230224609375, "learning_rate": 2.777777777777778e-08, "loss": -0.0493, "num_tokens": 43911574.0, "reward": 0.33585840463638306, "reward_std": 0.28109073638916016, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7057578563690186, "rewards/format_reward_step": 0.953125, "rewards/step_l1_reward": -0.31919723749160767, "step": 199 }, { "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.7489893799246318, "calib/avg_num_step_conf": 2.5078125, "calib/ece": 0.16906557377049175, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.36065573770491804, "calib/gap": 0.30168194587187397, "calib/mean_conf": 0.6129836065573769, "calib/mu_c": 0.7428057553956835, "calib/mu_w": 0.44112380952380953, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.10618852459016385, "calib/std_conf": 0.35251515450755383, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.7124039653035935, "calib/step_q_c_n": 269.0, "calib/step_q_gap": 0.2959428392982317, "calib/step_q_w": 0.41646112600536184, "calib/step_q_w_n": 373.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1725.0, "completions/max_terminated_length": 1725.0, "completions/mean_length": 436.5703125, "completions/mean_terminated_length": 445.2669372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.21333333333333335, "grad_norm": 0.004993748385459185, "kl": 0.1981201171875, "learning_rate": 0.0, "loss": -0.0456, "num_tokens": 44131384.0, "reward": 0.3842260241508484, "reward_std": 0.22596809267997742, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7367645502090454, "rewards/format_reward_step": 0.94921875, "rewards/step_l1_reward": -0.26753127574920654, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.008306001230375842, "train_runtime": 11013.984, "train_samples_per_second": 4.649, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44131384, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }