{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.1, "aux_distill/loss": 1.4131654458386558, "aux_distill/mean_u": 0.31677682190706, "aux_distill/n_active_tok": 24.571428571428573, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.8921052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.19807541241162607, "calib/step_q_w": 0.6940298507462687, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.015507475472986698, "learning_rate": 2.5000000000000004e-07, "loss": 0.0932, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.1092121005058289, "aux_distill/mean_u": 0.2935626227740425, "aux_distill/n_active_tok": 28.63157894736842, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.781, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.042553719008264435, "calib/step_q_w": 0.8235537190082645, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.01369641162455082, "learning_rate": 5.000000000000001e-07, "loss": 0.1313, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.2370983511209488, "aux_distill/mean_u": 0.36669209650917667, "aux_distill/n_active_tok": 19.333333333333332, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.4074074074074074, "calib/avg_num_step_conf": 0.2265625, "calib/ece": 0.6466666666666668, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9166666666666666, "calib/gap": 0.06666666666666676, "calib/mean_conf": 0.8966666666666666, "calib/mu_c": 0.9466666666666667, "calib/mu_w": 0.8799999999999999, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.6466666666666668, "calib/std_conf": 0.2110028962413128, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.91625, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.18605000000000005, "calib/step_q_w": 0.7302, "calib/step_q_w_n": 50.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1171875, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 661.203125, "completions/mean_terminated_length": 748.9734497070312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.010681693442165852, "learning_rate": 7.5e-07, "loss": 0.076, "num_tokens": 807991.0, "reward": 0.0260624997317791, "reward_std": 0.05503210425376892, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.009156249463558197, "rewards/format_reward_step": 0.03125, "step": 3 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.386748492717743, "aux_distill/mean_u": 0.2923102876422463, "aux_distill/n_active_tok": 17.0, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.3854166666666667, "calib/avg_num_step_conf": 0.265625, "calib/ece": 0.6525, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.043333333333333335, "calib/mean_conf": 0.9025000000000001, "calib/mu_c": 0.935, "calib/mu_w": 0.8916666666666667, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.6525, "calib/std_conf": 0.16532165617365438, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.6809375, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.021129807692307767, "calib/step_q_w": 0.6598076923076922, "calib/step_q_w_n": 52.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 736.09765625, "completions/mean_terminated_length": 805.303466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.01607530750334263, "learning_rate": 1.0000000000000002e-06, "loss": 0.0794, "num_tokens": 1102600.0, "reward": 0.03820156306028366, "reward_std": 0.08263486623764038, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.017809374257922173, "rewards/format_reward_step": 0.04296875, "step": 4 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.2863253951072693, "aux_distill/mean_u": 0.4327708304064644, "aux_distill/n_active_tok": 22.0, "calib/answer_extract_rate": 0.0390625, "calib/avg_num_step_conf": 0.1484375, "calib/ece": 0.9733333333333333, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 1.0, "calib/mean_conf": 0.9733333333333333, "calib/mu_c": NaN, "calib/mu_w": 0.9733333333333333, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.9733333333333333, "calib/std_conf": 0.019720265943665407, "calib/step_conf_rate": 0.03125, "calib/step_q_w": 0.8034210526315789, "calib/step_q_w_n": 38.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 674.91796875, "completions/mean_terminated_length": 754.4934692382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.006632169242948294, "learning_rate": 1.25e-06, "loss": 0.0243, "num_tokens": 1382067.0, "reward": 0.0123308589681983, "reward_std": 0.02698388881981373, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0012242187513038516, "rewards/format_reward_step": 0.0234375, "step": 5 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.314890056848526, "aux_distill/mean_u": 0.29406630225016933, "aux_distill/n_active_tok": 16.666666666666668, "calib/answer_extract_rate": 0.03515625, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.1953125, "calib/ece": 0.7971428571428572, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.03500000000000003, "calib/mean_conf": 0.9399999999999998, "calib/mu_c": 0.97, "calib/mu_w": 0.9349999999999999, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7971428571428572, "calib/std_conf": 0.0427617987059879, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.884, "calib/step_q_c_n": 5.0, "calib/step_q_gap": 0.18955555555555548, "calib/step_q_w": 0.6944444444444445, "calib/step_q_w_n": 45.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 685.56640625, "completions/mean_terminated_length": 763.065185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.012052838690578938, "learning_rate": 1.5e-06, "loss": 0.0596, "num_tokens": 1663524.0, "reward": 0.01902734488248825, "reward_std": 0.04970274120569229, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006804687902331352, "rewards/format_reward_step": 0.02734375, "step": 6 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 1.2238177359104156, "aux_distill/mean_u": 0.4388083803543682, "aux_distill/n_active_tok": 35.714285714285715, "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.9230769230769231, "calib/avg_num_step_conf": 0.5234375, "calib/ece": 0.6964428571428573, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": 0.2176769230769231, "calib/mean_conf": 0.7678714285714286, "calib/mu_c": 0.97, "calib/mu_w": 0.7523230769230769, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.6964428571428573, "calib/std_conf": 0.3521092168805652, "calib/step_conf_rate": 0.08203125, "calib/step_q_c": 0.882, "calib/step_q_c_n": 5.0, "calib/step_q_gap": 0.059165891472868215, "calib/step_q_w": 0.8228341085271318, "calib/step_q_w_n": 129.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2990.0, "completions/max_terminated_length": 2990.0, "completions/mean_length": 674.9375, "completions/mean_terminated_length": 761.1629638671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.009223215281963348, "learning_rate": 1.75e-06, "loss": 0.0493, "num_tokens": 1943732.0, "reward": 0.026374993845820427, "reward_std": 0.05906704068183899, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.01368748489767313, "rewards/format_reward_step": 0.03515625, "step": 7 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.3814922496676445, "aux_distill/mean_u": 0.30116149651308344, "aux_distill/n_active_tok": 23.75, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.6444444444444444, "calib/avg_num_step_conf": 0.37109375, "calib/ece": 0.7566722222222222, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": 0.05332666666666641, "calib/mean_conf": 0.9088944444444444, "calib/mu_c": 0.9533333333333333, "calib/mu_w": 0.9000066666666668, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.74945, "calib/std_conf": 0.2199744946635158, "calib/step_conf_rate": 0.08984375, "calib/step_q_c": 0.7177777777777777, "calib/step_q_c_n": 9.0, "calib/step_q_gap": -0.13652454780361756, "calib/step_q_w": 0.8543023255813953, "calib/step_q_w_n": 86.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 631.5, "completions/mean_terminated_length": 679.2605590820312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.01828446239233017, "learning_rate": 2.0000000000000003e-06, "loss": 0.0922, "num_tokens": 2211908.0, "reward": 0.03814628720283508, "reward_std": 0.09642170369625092, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.017698820680379868, "rewards/format_reward_step": 0.046875, "step": 8 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.3213333524763584, "aux_distill/mean_u": 0.27625369501018854, "aux_distill/n_active_tok": 20.75, "calib/answer_extract_rate": 0.09375, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.32421875, "calib/ece": 0.9483333333333334, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.9170588235294118, "calib/mean_conf": 0.8961111111111113, "calib/mu_c": 0.03, "calib/mu_w": 0.9470588235294118, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.8944444444444445, "calib/std_conf": 0.21587390581049057, "calib/step_conf_rate": 0.0703125, "calib/step_q_w": 0.7922891566265059, "calib/step_q_w_n": 83.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 716.25, "completions/mean_terminated_length": 783.5897827148438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0096, "grad_norm": 0.012646260671317577, "learning_rate": 2.25e-06, "loss": 0.0973, "num_tokens": 2502804.0, "reward": 0.03833867236971855, "reward_std": 0.08556064963340759, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.006364844273775816, "rewards/format_reward_step": 0.0625, "step": 9 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 1.3350291166986739, "aux_distill/mean_u": 0.2897335086370673, "aux_distill/n_active_tok": 20.857142857142858, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.9833333333333333, "calib/avg_num_step_conf": 0.30078125, "calib/ece": 0.766470588235294, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": 0.1256666666666666, "calib/mean_conf": 0.8841176470588235, "calib/mu_c": 0.995, "calib/mu_w": 0.8693333333333334, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.766470588235294, "calib/std_conf": 0.15751970272764243, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.3822222222222222, "calib/step_q_c_n": 9.0, "calib/step_q_gap": -0.31917777777777784, "calib/step_q_w": 0.7014, "calib/step_q_w_n": 68.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2752.0, "completions/max_terminated_length": 2752.0, "completions/mean_length": 672.25390625, "completions/mean_terminated_length": 705.3155517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.010666666666666666, "grad_norm": 0.013075040653347969, "learning_rate": 2.5e-06, "loss": 0.0647, "num_tokens": 2781701.0, "reward": 0.03389336168766022, "reward_std": 0.0721963569521904, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013099219650030136, "rewards/format_reward_step": 0.046875, "step": 10 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.1985960386016152, "aux_distill/mean_u": 0.28704307273318935, "aux_distill/n_active_tok": 27.09090909090909, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.5052631578947369, "calib/avg_num_step_conf": 0.58203125, "calib/ece": 0.6941666666666666, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.7916666666666666, "calib/gap": 0.054947368421052634, "calib/mean_conf": 0.9025, "calib/mu_c": 0.946, "calib/mu_w": 0.8910526315789473, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.171875, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.6941666666666666, "calib/std_conf": 0.18166475534162738, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.83, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.16220827067669163, "calib/step_q_w": 0.6677917293233083, "calib/step_q_w_n": 133.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 653.48828125, "completions/mean_terminated_length": 721.0905151367188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.01548672839999199, "learning_rate": 2.7500000000000004e-06, "loss": 0.1246, "num_tokens": 3053474.0, "reward": 0.060481246560811996, "reward_std": 0.1259399652481079, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02721250057220459, "rewards/format_reward_step": 0.07421875, "step": 11 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.195373563390029, "aux_distill/mean_u": 0.29857195077858234, "aux_distill/n_active_tok": 25.68421052631579, "calib/answer_extract_rate": 0.140625, "calib/auroc": 0.5746753246753247, "calib/avg_num_step_conf": 0.4765625, "calib/ece": 0.5375999999999999, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.08203125, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": -0.05090909090909079, "calib/mean_conf": 0.8976000000000001, "calib/mu_c": 0.8690909090909091, "calib/mu_w": 0.9199999999999999, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.1640625, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.49759999999999993, "calib/std_conf": 0.19996559704109104, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.6972122448979592, "calib/step_q_c_n": 49.0, "calib/step_q_gap": -0.03566446743080787, "calib/step_q_w": 0.732876712328767, "calib/step_q_w_n": 73.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 608.76171875, "completions/mean_terminated_length": 654.8025512695312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.013475374318659306, "learning_rate": 3e-06, "loss": 0.1106, "num_tokens": 3313493.0, "reward": 0.08334805071353912, "reward_std": 0.15470468997955322, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.04169609397649765, "rewards/format_reward_step": 0.08203125, "step": 12 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.172689814120531, "aux_distill/mean_u": 0.23420449887686123, "aux_distill/n_active_tok": 32.25, "calib/answer_extract_rate": 0.125, "calib/auroc": 0.828125, "calib/avg_num_step_conf": 0.5078125, "calib/ece": 0.610505, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.65, "calib/gap": 0.21499374999999998, "calib/mean_conf": 0.810505, "calib/mu_c": 0.9824999999999999, "calib/mu_w": 0.76750625, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.16015625, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.610505, "calib/std_conf": 0.2966255981452039, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.8213636363636364, "calib/step_q_c_n": 22.0, "calib/step_q_gap": 0.17941919191919187, "calib/step_q_w": 0.6419444444444445, "calib/step_q_w_n": 108.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 654.80859375, "completions/mean_terminated_length": 704.3319702148438, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.010491629131138325, "learning_rate": 3.2500000000000002e-06, "loss": 0.0957, "num_tokens": 3585716.0, "reward": 0.050288278609514236, "reward_std": 0.11872527003288269, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.030264053493738174, "rewards/format_reward_step": 0.0546875, "step": 13 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.0641672429831133, "aux_distill/mean_u": 0.18141969879736022, "aux_distill/n_active_tok": 26.608695652173914, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.38461538461538464, "calib/avg_num_step_conf": 0.6171875, "calib/ece": 0.5858064516129031, "calib/final_conf_rate": 0.12109375, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.5161290322580645, "calib/gap": 0.036615384615384605, "calib/mean_conf": 0.7212903225806453, "calib/mu_c": 0.752, "calib/mu_w": 0.7153846153846154, "calib/nonempty_final_conf_rate": 0.12109375, "calib/nonempty_reasoning_rate": 0.1953125, "calib/nonempty_step_conf_rate": 0.140625, "calib/pce": 0.5729032258064515, "calib/std_conf": 0.34307661079427937, "calib/step_conf_rate": 0.140625, "calib/step_q_c": 0.40700000000000003, "calib/step_q_c_n": 10.0, "calib/step_q_gap": -0.2302304054054054, "calib/step_q_w": 0.6372304054054054, "calib/step_q_w_n": 148.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 624.78515625, "completions/mean_terminated_length": 689.4180908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.014551612548530102, "learning_rate": 3.5e-06, "loss": 0.1489, "num_tokens": 3851061.0, "reward": 0.07813125103712082, "reward_std": 0.14755792915821075, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.04298124834895134, "rewards/format_reward_step": 0.09375, "step": 14 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.0421833992004395, "aux_distill/mean_u": 0.31752556933961956, "aux_distill/n_active_tok": 37.833333333333336, "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.7388888888888889, "calib/avg_num_step_conf": 0.890625, "calib/ece": 0.6410090909090909, "calib/final_conf_rate": 0.12890625, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.5151515151515151, "calib/gap": 0.2032233333333331, "calib/mean_conf": 0.7319181818181819, "calib/mu_c": 0.9166666666666666, "calib/mu_w": 0.7134433333333335, "calib/nonempty_final_conf_rate": 0.12890625, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.6410090909090909, "calib/std_conf": 0.32096754622551416, "calib/step_conf_rate": 0.1796875, "calib/step_q_c": 0.66375, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.06805419811320756, "calib/step_q_w": 0.5956958018867924, "calib/step_q_w_n": 212.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 639.1953125, "completions/mean_terminated_length": 699.2906494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.012739659287035465, "learning_rate": 3.7500000000000005e-06, "loss": 0.1424, "num_tokens": 4122575.0, "reward": 0.0783761590719223, "reward_std": 0.13948485255241394, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0395648255944252, "rewards/format_reward_step": 0.10546875, "step": 15 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.1064559129568248, "aux_distill/mean_u": 0.41832564748230244, "aux_distill/n_active_tok": 35.53846153846154, "calib/answer_extract_rate": 0.16796875, "calib/auroc": 0.2633333333333333, "calib/avg_num_step_conf": 0.90625, "calib/ece": 0.6767142857142856, "calib/final_conf_rate": 0.13671875, "calib/format_rate": 0.125, "calib/frac_conf_gt_0.9": 0.4857142857142857, "calib/gap": -0.16349999999999998, "calib/mean_conf": 0.7361428571428571, "calib/mu_c": 0.596, "calib/mu_w": 0.7595, "calib/nonempty_final_conf_rate": 0.13671875, "calib/nonempty_reasoning_rate": 0.22265625, "calib/nonempty_step_conf_rate": 0.1875, "calib/pce": 0.6349999999999999, "calib/std_conf": 0.32873803229555143, "calib/step_conf_rate": 0.1875, "calib/step_q_c": 0.5223076923076923, "calib/step_q_c_n": 13.0, "calib/step_q_gap": -0.0027028099754127233, "calib/step_q_w": 0.525010502283105, "calib/step_q_w_n": 219.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2960.0, "completions/max_terminated_length": 2960.0, "completions/mean_length": 688.58984375, "completions/mean_terminated_length": 753.3291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.017066666666666667, "grad_norm": 0.011877968907356262, "learning_rate": 4.000000000000001e-06, "loss": 0.1549, "num_tokens": 4407702.0, "reward": 0.09559077024459839, "reward_std": 0.1968509405851364, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.04665029048919678, "rewards/format_reward_step": 0.125, "step": 16 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 1.0695910771687827, "aux_distill/mean_u": 0.37407696141474706, "aux_distill/n_active_tok": 40.53333333333333, "calib/answer_extract_rate": 0.21875, "calib/auroc": 0.5897435897435898, "calib/avg_num_step_conf": 1.1875, "calib/ece": 0.4930021739130434, "calib/final_conf_rate": 0.1796875, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.30434782608695654, "calib/gap": 0.1684736263736264, "calib/mean_conf": 0.6243065217391305, "calib/mu_c": 0.7671428571428571, "calib/mu_w": 0.5986692307692307, "calib/nonempty_final_conf_rate": 0.1796875, "calib/nonempty_reasoning_rate": 0.28125, "calib/nonempty_step_conf_rate": 0.234375, "calib/pce": 0.4825673913043478, "calib/std_conf": 0.3615274530451221, "calib/step_conf_rate": 0.234375, "calib/step_q_c": 0.43399999999999994, "calib/step_q_c_n": 30.0, "calib/step_q_gap": -0.11178827838827843, "calib/step_q_w": 0.5457882783882784, "calib/step_q_w_n": 273.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 667.96875, "completions/mean_terminated_length": 695.1219482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.012100381776690483, "learning_rate": 4.25e-06, "loss": 0.2091, "num_tokens": 4682230.0, "reward": 0.131727933883667, "reward_std": 0.2430960088968277, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.08376838266849518, "rewards/format_reward_step": 0.15234375, "step": 17 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 1.0483861842325755, "aux_distill/mean_u": 0.31695819481401377, "aux_distill/n_active_tok": 40.285714285714285, "calib/answer_extract_rate": 0.20703125, "calib/auroc": 0.13, "calib/avg_num_step_conf": 1.12109375, "calib/ece": 0.62823214, "calib/final_conf_rate": 0.17578125, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": -0.4473961575, "calib/mean_conf": 0.5786854733333333, "calib/mu_c": 0.181, "calib/mu_w": 0.6283961575, "calib/nonempty_final_conf_rate": 0.17578125, "calib/nonempty_reasoning_rate": 0.265625, "calib/nonempty_step_conf_rate": 0.2265625, "calib/pce": 0.547903251111111, "calib/std_conf": 0.3337314558672589, "calib/step_conf_rate": 0.2265625, "calib/step_q_c": 0.5427272727272728, "calib/step_q_c_n": 11.0, "calib/step_q_gap": 0.03277944664031618, "calib/step_q_w": 0.5099478260869567, "calib/step_q_w_n": 276.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2990.0, "completions/max_terminated_length": 2990.0, "completions/mean_length": 634.51171875, "completions/mean_terminated_length": 688.2838745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.011530191637575626, "learning_rate": 4.5e-06, "loss": 0.2147, "num_tokens": 4955385.0, "reward": 0.125144824385643, "reward_std": 0.23584964871406555, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.07841463387012482, "rewards/format_reward_step": 0.15234375, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9958725925534964, "aux_distill/mean_u": 0.4017864188827994, "aux_distill/n_active_tok": 75.375, "calib/answer_extract_rate": 0.41796875, "calib/auroc": 0.7012195121951219, "calib/avg_num_step_conf": 2.375, "calib/ece": 0.4502235294117647, "calib/final_conf_rate": 0.33203125, "calib/format_rate": 0.2734375, "calib/frac_conf_gt_0.9": 0.27058823529411763, "calib/gap": 0.33635365853658544, "calib/mean_conf": 0.4855176470588235, "calib/mu_c": 0.81, "calib/mu_w": 0.4736463414634146, "calib/nonempty_final_conf_rate": 0.33203125, "calib/nonempty_reasoning_rate": 0.6015625, "calib/nonempty_step_conf_rate": 0.49609375, "calib/pce": 0.4502235294117647, "calib/std_conf": 0.38048375776618304, "calib/step_conf_rate": 0.49609375, "calib/step_q_c": 0.7280555555555555, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.3991562233351882, "calib/step_q_w": 0.32889933222036727, "calib/step_q_w_n": 599.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 574.859375, "completions/mean_terminated_length": 588.656005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.020266666666666665, "grad_norm": 0.015343696810305119, "learning_rate": 4.75e-06, "loss": 0.2044, "num_tokens": 5207309.0, "reward": 0.22790691256523132, "reward_std": 0.3663272261619568, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.17065757513046265, "rewards/format_reward_step": 0.2734375, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 1.0093897189944983, "aux_distill/mean_u": 0.33957365230949094, "aux_distill/n_active_tok": 107.375, "calib/answer_extract_rate": 0.55078125, "calib/auroc": 0.4638888888888889, "calib/avg_num_step_conf": 3.390625, "calib/ece": 0.3564631012784062, "calib/final_conf_rate": 0.5, "calib/format_rate": 0.40625, "calib/frac_conf_gt_0.9": 0.171875, "calib/gap": -0.056053082996629644, "calib/mean_conf": 0.43178028877840624, "calib/mu_c": 0.3844855, "calib/mu_w": 0.4405385829966296, "calib/nonempty_final_conf_rate": 0.5, "calib/nonempty_reasoning_rate": 0.765625, "calib/nonempty_step_conf_rate": 0.66796875, "calib/pce": 0.3159966950284062, "calib/std_conf": 0.3514771066323164, "calib/step_conf_rate": 0.66796875, "calib/step_q_c": 0.3547614457831325, "calib/step_q_c_n": 83.0, "calib/step_q_gap": 0.05394622469469712, "calib/step_q_w": 0.3008152210884354, "calib/step_q_w_n": 784.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 487.01953125, "completions/mean_terminated_length": 500.7108154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.021333333333333333, "grad_norm": 0.015337585471570492, "learning_rate": 5e-06, "loss": 0.2183, "num_tokens": 5436858.0, "reward": 0.38433361053466797, "reward_std": 0.43774229288101196, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.28038597106933594, "rewards/format_reward_step": 0.40625, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9585423450917006, "aux_distill/mean_u": 0.3275177277533846, "aux_distill/n_active_tok": 124.125, "calib/answer_extract_rate": 0.57421875, "calib/auroc": 0.4688228438228439, "calib/avg_num_step_conf": 3.88671875, "calib/ece": 0.33572305747126435, "calib/final_conf_rate": 0.56640625, "calib/format_rate": 0.44140625, "calib/frac_conf_gt_0.9": 0.09655172413793103, "calib/gap": -0.04217678904428901, "calib/mean_conf": 0.3948056781609196, "calib/mu_c": 0.3564102564102564, "calib/mu_w": 0.39858704545454543, "calib/nonempty_final_conf_rate": 0.56640625, "calib/nonempty_reasoning_rate": 0.7890625, "calib/nonempty_step_conf_rate": 0.70703125, "calib/pce": 0.3204367816091954, "calib/std_conf": 0.3195025419416025, "calib/step_conf_rate": 0.70703125, "calib/step_q_c": 0.19203731343283584, "calib/step_q_c_n": 67.0, "calib/step_q_gap": -0.09968971847449173, "calib/step_q_w": 0.29172703190732757, "calib/step_q_w_n": 928.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 458.6171875, "completions/mean_terminated_length": 479.2081298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.0224, "grad_norm": 0.016379138454794884, "learning_rate": 4.9722222222222224e-06, "loss": 0.2659, "num_tokens": 5657224.0, "reward": 0.4006524085998535, "reward_std": 0.446008563041687, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.30521100759506226, "rewards/format_reward_step": 0.44140625, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9724233224987984, "aux_distill/mean_u": 0.38292050698782076, "aux_distill/n_active_tok": 138.75, "calib/answer_extract_rate": 0.73046875, "calib/auroc": 0.5143292682926829, "calib/avg_num_step_conf": 4.3671875, "calib/ece": 0.28598071095066196, "calib/final_conf_rate": 0.71875, "calib/format_rate": 0.59375, "calib/frac_conf_gt_0.9": 0.08152173913043478, "calib/gap": 0.0046966513104987895, "calib/mean_conf": 0.37781385426672937, "calib/mu_c": 0.382, "calib/mu_w": 0.3773033486895012, "calib/nonempty_final_conf_rate": 0.71875, "calib/nonempty_reasoning_rate": 0.91796875, "calib/nonempty_step_conf_rate": 0.828125, "calib/pce": 0.2775494565217391, "calib/std_conf": 0.31317783992399195, "calib/step_conf_rate": 0.828125, "calib/step_q_c": 0.272159375, "calib/step_q_c_n": 64.0, "calib/step_q_gap": 0.029737965093538432, "calib/step_q_w": 0.24242140990646155, "calib/step_q_w_n": 1054.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 441.92578125, "completions/mean_terminated_length": 448.94049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 30.0, "epoch": 0.023466666666666667, "grad_norm": 0.015693727880716324, "learning_rate": 4.944444444444445e-06, "loss": 0.2656, "num_tokens": 5872173.0, "reward": 0.549543023109436, "reward_std": 0.46734896302223206, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.4272110164165497, "rewards/format_reward_step": 0.59375, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9368573725223541, "aux_distill/mean_u": 0.32277779323490313, "aux_distill/n_active_tok": 152.625, "calib/answer_extract_rate": 0.76171875, "calib/auroc": 0.38348849945235486, "calib/avg_num_step_conf": 4.77734375, "calib/ece": 0.3372117021276596, "calib/final_conf_rate": 0.734375, "calib/format_rate": 0.65234375, "calib/frac_conf_gt_0.9": 0.0851063829787234, "calib/gap": -0.1021048192771084, "calib/mean_conf": 0.36075638297872337, "calib/mu_c": 0.2706, "calib/mu_w": 0.3727048192771084, "calib/nonempty_final_conf_rate": 0.734375, "calib/nonempty_reasoning_rate": 0.90625, "calib/nonempty_step_conf_rate": 0.8515625, "calib/pce": 0.29047340425531915, "calib/std_conf": 0.3209408723162062, "calib/step_conf_rate": 0.8515625, "calib/step_q_c": 0.335420987654321, "calib/step_q_c_n": 81.0, "calib/step_q_gap": 0.054961840890472125, "calib/step_q_w": 0.28045914676384887, "calib/step_q_w_n": 1142.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2650.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 444.546875, "completions/mean_terminated_length": 446.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.0145284878090024, "learning_rate": 4.9166666666666665e-06, "loss": 0.3628, "num_tokens": 6089913.0, "reward": 0.6050222516059875, "reward_std": 0.4527014493942261, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.4678569436073303, "rewards/format_reward_step": 0.65234375, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9629884772002697, "aux_distill/mean_u": 0.3936236706788197, "aux_distill/n_active_tok": 147.75, "calib/answer_extract_rate": 0.796875, "calib/auroc": 0.5271164021164021, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.2884106318407961, "calib/final_conf_rate": 0.78515625, "calib/format_rate": 0.71484375, "calib/frac_conf_gt_0.9": 0.09950248756218906, "calib/gap": 0.026388207142857167, "calib/mean_conf": 0.3470116268656716, "calib/mu_c": 0.37064285714285716, "calib/mu_w": 0.34425465, "calib/nonempty_final_conf_rate": 0.78515625, "calib/nonempty_reasoning_rate": 0.94921875, "calib/nonempty_step_conf_rate": 0.88671875, "calib/pce": 0.2654723233830846, "calib/std_conf": 0.3144549521779533, "calib/step_conf_rate": 0.88671875, "calib/step_q_c": 0.25758333333333333, "calib/step_q_c_n": 72.0, "calib/step_q_gap": -0.012828107025411095, "calib/step_q_w": 0.2704114403587444, "calib/step_q_w_n": 1115.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2254.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 396.0625, "completions/mean_terminated_length": 399.18109130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0256, "grad_norm": 0.013611423783004284, "learning_rate": 4.888888888888889e-06, "loss": 0.2628, "num_tokens": 6295817.0, "reward": 0.6710429787635803, "reward_std": 0.4255039691925049, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.5413047075271606, "rewards/format_reward_step": 0.71484375, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9668833911418915, "aux_distill/mean_u": 0.354028013155989, "aux_distill/n_active_tok": 140.875, "calib/answer_extract_rate": 0.84375, "calib/auroc": 0.5017616580310882, "calib/avg_num_step_conf": 4.41015625, "calib/ece": 0.28692567340366976, "calib/final_conf_rate": 0.8515625, "calib/format_rate": 0.76171875, "calib/frac_conf_gt_0.9": 0.08256880733944955, "calib/gap": -0.007618006207253836, "calib/mean_conf": 0.3615403816422018, "calib/mu_c": 0.35479600000000006, "calib/mu_w": 0.3624140062072539, "calib/nonempty_final_conf_rate": 0.8515625, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.890625, "calib/pce": 0.2668935779816514, "calib/std_conf": 0.3098885909725723, "calib/step_conf_rate": 0.890625, "calib/step_q_c": 0.40253232323232324, "calib/step_q_c_n": 99.0, "calib/step_q_gap": 0.11097047857212905, "calib/step_q_w": 0.2915618446601942, "calib/step_q_w_n": 1030.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2781.0, "completions/max_terminated_length": 2781.0, "completions/mean_length": 347.08203125, "completions/mean_terminated_length": 347.08203125, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.02666666666666667, "grad_norm": 0.011474363505840302, "learning_rate": 4.861111111111111e-06, "loss": 0.2234, "num_tokens": 6487894.0, "reward": 0.7150354981422424, "reward_std": 0.39984044432640076, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.5706959962844849, "rewards/format_reward_step": 0.76171875, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9821081515401602, "aux_distill/mean_u": 0.43286420605477743, "aux_distill/n_active_tok": 147.125, "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.4466071428571429, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.313422429906542, "calib/final_conf_rate": 0.8359375, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 0.07009345794392523, "calib/gap": -0.04397699999999988, "calib/mean_conf": 0.3434, "calib/mu_c": 0.30230000000000007, "calib/mu_w": 0.34627699999999995, "calib/nonempty_final_conf_rate": 0.8359375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.29570093457943925, "calib/std_conf": 0.30139188371635606, "calib/step_conf_rate": 0.9453125, "calib/step_q_c": 0.38596491228070184, "calib/step_q_c_n": 57.0, "calib/step_q_gap": 0.056750688783824055, "calib/step_q_w": 0.3292142234968778, "calib/step_q_w_n": 1121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2283.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 334.86328125, "completions/mean_terminated_length": 341.53387451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.027733333333333332, "grad_norm": 0.012159218080341816, "learning_rate": 4.833333333333333e-06, "loss": 0.1981, "num_tokens": 6678859.0, "reward": 0.7168387174606323, "reward_std": 0.38704153895378113, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.6016461849212646, "rewards/format_reward_step": 0.77734375, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9431140720844269, "aux_distill/mean_u": 0.36024294460778056, "aux_distill/n_active_tok": 142.625, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.44554772581678415, "calib/avg_num_step_conf": 4.4609375, "calib/ece": 0.2685700421940928, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.04219409282700422, "calib/gap": -0.05232706598334408, "calib/mean_conf": 0.3059717299578059, "calib/mu_c": 0.25673571428571423, "calib/mu_w": 0.3090627802690583, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.2577350210970464, "calib/std_conf": 0.26608733901787746, "calib/step_conf_rate": 0.95703125, "calib/step_q_c": 0.23749074074074072, "calib/step_q_c_n": 54.0, "calib/step_q_gap": -0.09427304602396519, "calib/step_q_w": 0.3317637867647059, "calib/step_q_w_n": 1088.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1761.0, "completions/max_terminated_length": 1761.0, "completions/mean_length": 287.59375, "completions/mean_terminated_length": 288.7215881347656, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0288, "grad_norm": 0.011444371193647385, "learning_rate": 4.805555555555556e-06, "loss": 0.065, "num_tokens": 6857699.0, "reward": 0.821548581123352, "reward_std": 0.2998458445072174, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.7095034122467041, "rewards/format_reward_step": 0.87890625, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9115942157804966, "aux_distill/mean_u": 0.3512940447289571, "aux_distill/n_active_tok": 141.0, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.527336860670194, "calib/avg_num_step_conf": 4.40625, "calib/ece": 0.2601383152334178, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.83203125, "calib/frac_conf_gt_0.9": 0.029535864978902954, "calib/gap": 0.033239774395301624, "calib/mean_conf": 0.3483248131237131, "calib/mu_c": 0.3777777777777778, "calib/mu_w": 0.3445380033824762, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.24726953886210978, "calib/std_conf": 0.25299117933780857, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.32133137254901956, "calib/step_q_c_n": 102.0, "calib/step_q_gap": -0.01909075220731571, "calib/step_q_w": 0.34042212475633526, "calib/step_q_w_n": 1026.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 312.0546875, "completions/mean_terminated_length": 313.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.029866666666666666, "grad_norm": 0.010167010128498077, "learning_rate": 4.777777777777778e-06, "loss": 0.1532, "num_tokens": 7044529.0, "reward": 0.7973880171775818, "reward_std": 0.3464619815349579, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.6572760343551636, "rewards/format_reward_step": 0.83203125, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9127364363521338, "aux_distill/mean_u": 0.3823967146983576, "aux_distill/n_active_tok": 157.375, "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.5223503965392934, "calib/avg_num_step_conf": 4.91796875, "calib/ece": 0.29551680672268904, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.037815126050420166, "calib/gap": 0.03450781062244651, "calib/mean_conf": 0.3666680672268907, "calib/mu_c": 0.39842105263157895, "calib/mu_w": 0.36391324200913244, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.2911764705882353, "calib/std_conf": 0.24589287461620496, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.38605633802816897, "calib/step_q_c_n": 71.0, "calib/step_q_gap": 0.01928975553658646, "calib/step_q_w": 0.3667665824915825, "calib/step_q_w_n": 1188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1309.0, "completions/max_terminated_length": 1309.0, "completions/mean_length": 291.05859375, "completions/mean_terminated_length": 291.05859375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.030933333333333334, "grad_norm": 0.009713099338114262, "learning_rate": 4.75e-06, "loss": 0.1081, "num_tokens": 7226168.0, "reward": 0.8235447406768799, "reward_std": 0.3040723204612732, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.6939644813537598, "rewards/format_reward_step": 0.87890625, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9098640028387308, "aux_distill/mean_u": 0.32866491871894127, "aux_distill/n_active_tok": 177.625, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.39525993883792054, "calib/avg_num_step_conf": 5.68359375, "calib/ece": 0.30045165289256204, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.049586776859504134, "calib/gap": -0.10314159021406732, "calib/mean_conf": 0.37582933884297526, "calib/mu_c": 0.28291666666666665, "calib/mu_w": 0.386058256880734, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28855371900826443, "calib/std_conf": 0.27210583928978066, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3050420168067227, "calib/step_q_c_n": 119.0, "calib/step_q_gap": -0.08516090235495405, "calib/step_q_w": 0.39020291916167676, "calib/step_q_w_n": 1336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1679.0, "completions/max_terminated_length": 1679.0, "completions/mean_length": 312.91796875, "completions/mean_terminated_length": 314.1451110839844, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.032, "grad_norm": 0.011567425914108753, "learning_rate": 4.722222222222222e-06, "loss": 0.2423, "num_tokens": 7413259.0, "reward": 0.837209939956665, "reward_std": 0.2785930931568146, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.6744199395179749, "rewards/format_reward_step": 0.90234375, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9276929814368486, "aux_distill/mean_u": 0.3468866376900609, "aux_distill/n_active_tok": 162.25, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5231801971890078, "calib/avg_num_step_conf": 5.0703125, "calib/ece": 0.28122983870967744, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.03225806451612903, "calib/gap": 0.005270610446821866, "calib/mean_conf": 0.3623185483870968, "calib/mu_c": 0.36714285714285716, "calib/mu_w": 0.3618722466960353, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2794354838709677, "calib/std_conf": 0.23781169558956333, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.38036585365853653, "calib/step_q_c_n": 82.0, "calib/step_q_gap": -0.0040372713414634775, "calib/step_q_w": 0.384403125, "calib/step_q_w_n": 1216.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1924.0, "completions/max_terminated_length": 1924.0, "completions/mean_length": 276.140625, "completions/mean_terminated_length": 276.140625, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.03306666666666667, "grad_norm": 0.010378521867096424, "learning_rate": 4.694444444444445e-06, "loss": 0.1865, "num_tokens": 7589863.0, "reward": 0.8974331617355347, "reward_std": 0.2091233730316162, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.7558038234710693, "rewards/format_reward_step": 0.95703125, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.961167685687542, "aux_distill/mean_u": 0.37351661006632264, "aux_distill/n_active_tok": 174.125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.466897233201581, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.27502757201646094, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.01646090534979424, "calib/gap": -0.012935968379446605, "calib/mean_conf": 0.34920288065843624, "calib/mu_c": 0.3374913043478261, "calib/mu_w": 0.3504272727272727, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26479012345679015, "calib/std_conf": 0.22711269558032068, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.405546218487395, "calib/step_q_c_n": 119.0, "calib/step_q_gap": 0.03627239403598437, "calib/step_q_w": 0.36927382445141066, "calib/step_q_w_n": 1276.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1974.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 293.21484375, "completions/mean_terminated_length": 294.3647155761719, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.034133333333333335, "grad_norm": 0.010933423414826393, "learning_rate": 4.666666666666667e-06, "loss": 0.1524, "num_tokens": 7771630.0, "reward": 0.885528028011322, "reward_std": 0.22666269540786743, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.743712306022644, "rewards/format_reward_step": 0.9375, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.948054114356637, "aux_distill/mean_u": 0.36492014468038125, "aux_distill/n_active_tok": 166.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.565557939914163, "calib/avg_num_step_conf": 5.33203125, "calib/ece": 0.27930830039525695, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.04072746781115888, "calib/mean_conf": 0.33899209486166015, "calib/mu_c": 0.3765, "calib/mu_w": 0.3357725321888411, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2696245059288538, "calib/std_conf": 0.22427851812523084, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.4223333333333334, "calib/step_q_c_n": 90.0, "calib/step_q_gap": 0.046461960784313805, "calib/step_q_w": 0.3758713725490196, "calib/step_q_w_n": 1275.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1012.0, "completions/max_terminated_length": 1012.0, "completions/mean_length": 261.19140625, "completions/mean_terminated_length": 262.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0352, "grad_norm": 0.010418594814836979, "learning_rate": 4.638888888888889e-06, "loss": 0.0628, "num_tokens": 7945367.0, "reward": 0.9025432467460632, "reward_std": 0.20960542559623718, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7738364934921265, "rewards/format_reward_step": 0.953125, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9073459450155497, "aux_distill/mean_u": 0.36159769528386776, "aux_distill/n_active_tok": 198.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5216346153846154, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.217292, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012, "calib/gap": -0.005361950549450545, "calib/mean_conf": 0.30761199999999994, "calib/mu_c": 0.30280769230769233, "calib/mu_w": 0.3081696428571429, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.210452, "calib/std_conf": 0.2124196164576144, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.36953672316384184, "calib/step_q_c_n": 177.0, "calib/step_q_gap": 0.0358509176280008, "calib/step_q_w": 0.33368580553584104, "calib/step_q_w_n": 1409.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1988.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 277.12109375, "completions/mean_terminated_length": 277.12109375, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.03626666666666667, "grad_norm": 0.00895098876208067, "learning_rate": 4.611111111111112e-06, "loss": 0.0133, "num_tokens": 8121422.0, "reward": 0.923614501953125, "reward_std": 0.18150296807289124, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.7847288846969604, "rewards/format_reward_step": 0.9609375, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8795184995979071, "aux_distill/mean_u": 0.37786130563642584, "aux_distill/n_active_tok": 225.625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5231277533039648, "calib/avg_num_step_conf": 7.15234375, "calib/ece": 0.1823187250996016, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.01311233480176216, "calib/mean_conf": 0.27564143426294824, "calib/mu_c": 0.28750000000000003, "calib/mu_w": 0.2743876651982379, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18117131474103584, "calib/std_conf": 0.18911776437217667, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33661194029850744, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.03802879356898475, "calib/step_q_w": 0.2985831467295227, "calib/step_q_w_n": 1697.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1376.0, "completions/max_terminated_length": 1376.0, "completions/mean_length": 327.74609375, "completions/mean_terminated_length": 329.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.037333333333333336, "grad_norm": 0.007883084006607533, "learning_rate": 4.583333333333333e-06, "loss": 0.1154, "num_tokens": 8314581.0, "reward": 0.9347312450408936, "reward_std": 0.163455069065094, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8147751092910767, "rewards/format_reward_step": 0.9609375, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8844308815896511, "aux_distill/mean_u": 0.3635496135825137, "aux_distill/n_active_tok": 237.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5120708748615725, "calib/avg_num_step_conf": 7.7109375, "calib/ece": 0.1540284584980237, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.0014955038759689732, "calib/mean_conf": 0.28589249011857704, "calib/mu_c": 0.2846511627906977, "calib/mu_w": 0.28614666666666666, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13498023715415022, "calib/std_conf": 0.20678860428127496, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33951417004048584, "calib/step_q_c_n": 247.0, "calib/step_q_gap": 0.03424879656046276, "calib/step_q_w": 0.3052653734800231, "calib/step_q_w_n": 1727.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1870.0, "completions/max_terminated_length": 1870.0, "completions/mean_length": 294.81640625, "completions/mean_terminated_length": 297.1377868652344, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.0384, "grad_norm": 0.007582934107631445, "learning_rate": 4.555555555555556e-06, "loss": 0.0668, "num_tokens": 8492766.0, "reward": 0.9707470536231995, "reward_std": 0.15500092506408691, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.7891504168510437, "rewards/format_reward_step": 0.984375, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8487446773797274, "aux_distill/mean_u": 0.3279911799047748, "aux_distill/n_active_tok": 246.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.42894611349546974, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.20217011952191236, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.056092846924177375, "calib/mean_conf": 0.25040358565737053, "calib/mu_c": 0.19833333333333336, "calib/mu_w": 0.25442618025751074, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19043027888446215, "calib/std_conf": 0.18634575676102816, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21467289719626167, "calib/step_q_c_n": 107.0, "calib/step_q_gap": -0.06815055959386182, "calib/step_q_w": 0.2828234567901235, "calib/step_q_w_n": 1863.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2084.0, "completions/max_terminated_length": 2084.0, "completions/mean_length": 340.00390625, "completions/mean_terminated_length": 340.00390625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.039466666666666664, "grad_norm": 0.008167412132024765, "learning_rate": 4.527777777777778e-06, "loss": 0.1161, "num_tokens": 8686903.0, "reward": 0.9393041133880615, "reward_std": 0.1312025487422943, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.835639476776123, "rewards/format_reward_step": 0.97265625, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9289264902472496, "aux_distill/mean_u": 0.41301154567567255, "aux_distill/n_active_tok": 264.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4389071757735352, "calib/avg_num_step_conf": 8.25, "calib/ece": 0.15761468253968253, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.04175622119815667, "calib/mean_conf": 0.2345281746031746, "calib/mu_c": 0.1985714285714286, "calib/mu_w": 0.24032764976958526, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.12662698412698414, "calib/std_conf": 0.16473101583857125, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2512442396313364, "calib/step_q_c_n": 217.0, "calib/step_q_gap": -0.031052330289507935, "calib/step_q_w": 0.28229656992084434, "calib/step_q_w_n": 1895.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1760.0, "completions/max_terminated_length": 1760.0, "completions/mean_length": 329.30859375, "completions/mean_terminated_length": 329.30859375, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "epoch": 0.04053333333333333, "grad_norm": 0.007806702516973019, "learning_rate": 4.5e-06, "loss": 0.1166, "num_tokens": 8878094.0, "reward": 0.955902099609375, "reward_std": 0.1512955129146576, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.80633544921875, "rewards/format_reward_step": 0.96875, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8619994595646858, "aux_distill/mean_u": 0.35136166090086507, "aux_distill/n_active_tok": 290.75, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4599693545297835, "calib/avg_num_step_conf": 9.8828125, "calib/ece": 0.14684, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.027571346485347648, "calib/mean_conf": 0.22460000000000002, "calib/mu_c": 0.19956521739130434, "calib/mu_w": 0.227136563876652, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.13971999999999998, "calib/std_conf": 0.15636380655381857, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2652631578947368, "calib/step_q_c_n": 152.0, "calib/step_q_gap": 0.010465849231995061, "calib/step_q_w": 0.25479730866274175, "calib/step_q_w_n": 2378.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1418.0, "completions/max_terminated_length": 1418.0, "completions/mean_length": 360.7890625, "completions/mean_terminated_length": 363.6299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.0416, "grad_norm": 0.007670476567000151, "learning_rate": 4.472222222222223e-06, "loss": 0.0735, "num_tokens": 9076544.0, "reward": 0.9427025318145752, "reward_std": 0.15323060750961304, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.8346238136291504, "rewards/format_reward_step": 0.9609375, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8237579446285963, "aux_distill/mean_u": 0.3452751043865746, "aux_distill/n_active_tok": 307.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5413419179939003, "calib/avg_num_step_conf": 9.94140625, "calib/ece": 0.13450592885375492, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.007600813283632657, "calib/mean_conf": 0.2224110671936759, "calib/mu_c": 0.22923076923076924, "calib/mu_w": 0.22162995594713658, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12707509881422924, "calib/std_conf": 0.1768323523174813, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3306122448979592, "calib/step_q_c_n": 245.0, "calib/step_q_gap": 0.08606224489795919, "calib/step_q_w": 0.24455000000000002, "calib/step_q_w_n": 2300.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2042.0, "completions/max_terminated_length": 2042.0, "completions/mean_length": 360.4921875, "completions/mean_terminated_length": 361.9059143066406, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.042666666666666665, "grad_norm": 0.008977770805358887, "learning_rate": 4.444444444444444e-06, "loss": 0.0507, "num_tokens": 9275590.0, "reward": 0.9639776945114136, "reward_std": 0.125410258769989, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8459242582321167, "rewards/format_reward_step": 0.98046875, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8463635481894016, "aux_distill/mean_u": 0.3650670920135531, "aux_distill/n_active_tok": 354.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49706208425720616, "calib/avg_num_step_conf": 11.0703125, "calib/ece": 0.1395983935742972, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.008308203991130775, "calib/mean_conf": 0.2163855421686747, "calib/mu_c": 0.20954545454545456, "calib/mu_w": 0.21785365853658534, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08963855421686748, "calib/std_conf": 0.16604720609034307, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2590909090909091, "calib/step_q_c_n": 407.0, "calib/step_q_gap": 0.012933925160130366, "calib/step_q_w": 0.24615698393077873, "calib/step_q_w_n": 2427.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3049.0, "completions/max_terminated_length": 3049.0, "completions/mean_length": 413.68359375, "completions/mean_terminated_length": 413.68359375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.04373333333333333, "grad_norm": 0.006722410209476948, "learning_rate": 4.416666666666667e-06, "loss": 0.2131, "num_tokens": 9488741.0, "reward": 0.9724918007850647, "reward_std": 0.14764532446861267, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8004522919654846, "rewards/format_reward_step": 0.97265625, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8599336389452219, "aux_distill/mean_u": 0.3952088352244836, "aux_distill/n_active_tok": 315.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5308812684365782, "calib/avg_num_step_conf": 10.62109375, "calib/ece": 0.1265, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": -0.00944690265486725, "calib/mean_conf": 0.19854000000000002, "calib/mu_c": 0.19000000000000003, "calib/mu_w": 0.19944690265486728, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11452000000000001, "calib/std_conf": 0.17253048542214214, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24458100558659218, "calib/step_q_c_n": 179.0, "calib/step_q_gap": 0.01724714731887564, "calib/step_q_w": 0.22733385826771654, "calib/step_q_w_n": 2540.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2192.0, "completions/max_terminated_length": 2192.0, "completions/mean_length": 363.9921875, "completions/mean_terminated_length": 368.3083190917969, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.0448, "grad_norm": 0.006656656973063946, "learning_rate": 4.388888888888889e-06, "loss": 0.1028, "num_tokens": 9686291.0, "reward": 0.9527983665466309, "reward_std": 0.14238354563713074, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8430967926979065, "rewards/format_reward_step": 0.96875, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8323895744979382, "aux_distill/mean_u": 0.3494921272171143, "aux_distill/n_active_tok": 355.0, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.49543650793650784, "calib/avg_num_step_conf": 11.4765625, "calib/ece": 0.14447073170731708, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.008338095238095228, "calib/mean_conf": 0.20398455284552844, "calib/mu_c": 0.19686666666666666, "calib/mu_w": 0.2052047619047619, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10105691056910572, "calib/std_conf": 0.17250708930054556, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2714287822878229, "calib/step_q_c_n": 271.0, "calib/step_q_gap": 0.049459800640653806, "calib/step_q_w": 0.2219689816471691, "calib/step_q_w_n": 2667.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2715.0, "completions/max_terminated_length": 2715.0, "completions/mean_length": 429.9296875, "completions/mean_terminated_length": 431.61572265625, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.04586666666666667, "grad_norm": 0.006840248592197895, "learning_rate": 4.361111111111112e-06, "loss": 0.2256, "num_tokens": 9901577.0, "reward": 0.9465239644050598, "reward_std": 0.16663599014282227, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.7992979288101196, "rewards/format_reward_step": 0.953125, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8238438032567501, "aux_distill/mean_u": 0.3623975311474338, "aux_distill/n_active_tok": 386.5, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5369152046783625, "calib/avg_num_step_conf": 13.3515625, "calib/ece": 0.12897154471544714, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013685672514619857, "calib/mean_conf": 0.1984268292682927, "calib/mu_c": 0.21111111111111108, "calib/mu_w": 0.19742543859649123, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12711382113821137, "calib/std_conf": 0.16426555170002113, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22874999999999998, "calib/step_q_c_n": 152.0, "calib/step_q_gap": 0.005412584200857301, "calib/step_q_w": 0.22333741579914268, "calib/step_q_w_n": 3266.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 419.015625, "completions/mean_terminated_length": 425.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.046933333333333334, "grad_norm": 0.006506130564957857, "learning_rate": 4.333333333333334e-06, "loss": 0.0528, "num_tokens": 10115165.0, "reward": 0.9438990950584412, "reward_std": 0.15580600500106812, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.8565481901168823, "rewards/format_reward_step": 0.9609375, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8242166340351105, "aux_distill/mean_u": 0.3212903089373872, "aux_distill/n_active_tok": 352.125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5975462590506838, "calib/avg_num_step_conf": 12.609375, "calib/ece": 0.10689516129032259, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.04069589702333065, "calib/mean_conf": 0.18745967741935482, "calib/mu_c": 0.22454545454545452, "calib/mu_w": 0.18384955752212387, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10282258064516128, "calib/std_conf": 0.15124083012424303, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24678414096916299, "calib/step_q_c_n": 227.0, "calib/step_q_gap": 0.030857816410682443, "calib/step_q_w": 0.21592632455848054, "calib/step_q_w_n": 3001.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 400.49609375, "completions/mean_terminated_length": 406.85321044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.048, "grad_norm": 0.0066505554132163525, "learning_rate": 4.305555555555556e-06, "loss": 0.0685, "num_tokens": 10322740.0, "reward": 0.9599459171295166, "reward_std": 0.11950437724590302, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.8652043342590332, "rewards/format_reward_step": 0.96875, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8673719074577093, "aux_distill/mean_u": 0.3893094594404342, "aux_distill/n_active_tok": 310.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5761150234741783, "calib/avg_num_step_conf": 9.70703125, "calib/ece": 0.09094861660079052, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03984272300469485, "calib/mean_conf": 0.17695652173913046, "calib/mu_c": 0.2105, "calib/mu_w": 0.17065727699530514, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05490118577075099, "calib/std_conf": 0.16107784828602306, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.32854938271604933, "calib/step_q_c_n": 324.0, "calib/step_q_gap": 0.08875299215612334, "calib/step_q_w": 0.239796390559926, "calib/step_q_w_n": 2161.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 341.765625, "completions/mean_terminated_length": 343.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 31.0, "epoch": 0.04906666666666667, "grad_norm": 0.007414902560412884, "learning_rate": 4.277777777777778e-06, "loss": 0.0844, "num_tokens": 10515000.0, "reward": 0.9855220317840576, "reward_std": 0.11352653801441193, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8343254327774048, "rewards/format_reward_step": 0.98046875, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8376510702073574, "aux_distill/mean_u": 0.3701711814266018, "aux_distill/n_active_tok": 377.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4256296800544588, "calib/avg_num_step_conf": 12.76171875, "calib/ece": 0.1684920634920635, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.037726344452008204, "calib/mean_conf": 0.1996031746031746, "calib/mu_c": 0.16576923076923075, "calib/mu_w": 0.20349557522123896, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13246031746031744, "calib/std_conf": 0.16099591236874838, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24721991701244814, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.027494206503525437, "calib/step_q_w": 0.2197257105089227, "calib/step_q_w_n": 3026.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 398.42578125, "completions/mean_terminated_length": 403.15020751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.050133333333333335, "grad_norm": 0.006663271691650152, "learning_rate": 4.25e-06, "loss": 0.0534, "num_tokens": 10722973.0, "reward": 0.9649396538734436, "reward_std": 0.10768487304449081, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8478480577468872, "rewards/format_reward_step": 0.98046875, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8315467443317175, "aux_distill/mean_u": 0.3491289536408796, "aux_distill/n_active_tok": 358.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5252850435949028, "calib/avg_num_step_conf": 13.65625, "calib/ece": 0.12555120967741937, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013862441314553942, "calib/mean_conf": 0.20009395161290322, "calib/mu_c": 0.21199999999999997, "calib/mu_w": 0.19813755868544602, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09225806451612906, "calib/std_conf": 0.1687304911175566, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29284172661870506, "calib/step_q_c_n": 278.0, "calib/step_q_gap": 0.05798778006805247, "calib/step_q_w": 0.2348539465506526, "calib/step_q_w_n": 3218.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2289.0, "completions/max_terminated_length": 2289.0, "completions/mean_length": 368.98046875, "completions/mean_terminated_length": 377.8360290527344, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.0512, "grad_norm": 0.006752731278538704, "learning_rate": 4.222222222222223e-06, "loss": 0.1196, "num_tokens": 10921120.0, "reward": 0.9645509719848633, "reward_std": 0.14472366869449615, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8236333131790161, "rewards/format_reward_step": 0.96875, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8337438236922026, "aux_distill/mean_u": 0.39130343889937536, "aux_distill/n_active_tok": 376.875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.49167733674775926, "calib/avg_num_step_conf": 13.01171875, "calib/ece": 0.11639837398373983, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": -0.011475032010243275, "calib/mean_conf": 0.17872357723577237, "calib/mu_c": 0.1687878787878788, "calib/mu_w": 0.18026291079812207, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08048780487804877, "calib/std_conf": 0.1658453977298707, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27235294117647063, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.04456563019685192, "calib/step_q_w": 0.2277873109796187, "calib/step_q_w_n": 3042.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2959.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 387.12890625, "completions/mean_terminated_length": 393.2738342285156, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.05226666666666667, "grad_norm": 0.00637467298656702, "learning_rate": 4.194444444444445e-06, "loss": 0.0965, "num_tokens": 11124761.0, "reward": 0.9503391981124878, "reward_std": 0.16382259130477905, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.8147409558296204, "rewards/format_reward_step": 0.95703125, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8423834536224604, "aux_distill/mean_u": 0.37696418118094266, "aux_distill/n_active_tok": 362.125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.543382167213943, "calib/avg_num_step_conf": 12.73046875, "calib/ece": 0.1179003984063745, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.022673276079818117, "calib/mean_conf": 0.19553386454183264, "calib/mu_c": 0.21486486486486486, "calib/mu_w": 0.19219158878504675, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08301195219123504, "calib/std_conf": 0.1812651253439056, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.298502538071066, "calib/step_q_c_n": 394.0, "calib/step_q_gap": 0.0967322064829334, "calib/step_q_w": 0.20177033158813262, "calib/step_q_w_n": 2865.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1494.0, "completions/max_terminated_length": 1494.0, "completions/mean_length": 367.125, "completions/mean_terminated_length": 374.4382629394531, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.05333333333333334, "grad_norm": 0.006909578572958708, "learning_rate": 4.166666666666667e-06, "loss": 0.0143, "num_tokens": 11324105.0, "reward": 0.9689640402793884, "reward_std": 0.1383761614561081, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.8207405209541321, "rewards/format_reward_step": 0.97265625, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.820130554959178, "aux_distill/mean_u": 0.3369361976651867, "aux_distill/n_active_tok": 380.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5625835459958682, "calib/avg_num_step_conf": 13.02734375, "calib/ece": 0.1058, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.06487908615870702, "calib/mean_conf": 0.18395999999999998, "calib/mu_c": 0.23871794871794877, "calib/mu_w": 0.17383886255924175, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06688000000000001, "calib/std_conf": 0.1822106429383311, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22159268929503917, "calib/step_q_c_n": 383.0, "calib/step_q_gap": -0.01511767655861937, "calib/step_q_w": 0.23671036585365854, "calib/step_q_w_n": 2952.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3036.0, "completions/max_terminated_length": 3036.0, "completions/mean_length": 367.9765625, "completions/mean_terminated_length": 373.8174743652344, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.0544, "grad_norm": 0.006935219746083021, "learning_rate": 4.138888888888889e-06, "loss": 0.1055, "num_tokens": 11527603.0, "reward": 0.9762929677963257, "reward_std": 0.13964669406414032, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8275859355926514, "rewards/format_reward_step": 0.97265625, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8263747384771705, "aux_distill/mean_u": 0.3891820417880071, "aux_distill/n_active_tok": 373.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5281593406593407, "calib/avg_num_step_conf": 13.015625, "calib/ece": 0.1266, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.008986950549450534, "calib/mean_conf": 0.19963999999999998, "calib/mu_c": 0.2076923076923077, "calib/mu_w": 0.19870535714285717, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11112, "calib/std_conf": 0.18129608489981244, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2317586206896552, "calib/step_q_c_n": 290.0, "calib/step_q_gap": -0.01828118864630801, "calib/step_q_w": 0.2500398093359632, "calib/step_q_w_n": 3042.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2896.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 386.859375, "completions/mean_terminated_length": 393.0000305175781, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.055466666666666664, "grad_norm": 0.006386070977896452, "learning_rate": 4.111111111111111e-06, "loss": 0.0668, "num_tokens": 11734591.0, "reward": 0.9621462821960449, "reward_std": 0.13067379593849182, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8461675643920898, "rewards/format_reward_step": 0.9765625, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8546681217849255, "aux_distill/mean_u": 0.3559310260113764, "aux_distill/n_active_tok": 354.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.41209183673469385, "calib/avg_num_step_conf": 12.4921875, "calib/ece": 0.1763453815261044, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": -0.03902142857142857, "calib/mean_conf": 0.1827710843373494, "calib/mu_c": 0.15142857142857144, "calib/mu_w": 0.19045, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08116465863453816, "calib/std_conf": 0.17555863734895014, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23109442060085839, "calib/step_q_c_n": 466.0, "calib/step_q_gap": -0.002759898579229453, "calib/step_q_w": 0.23385431918008784, "calib/step_q_w_n": 2732.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1689.0, "completions/max_terminated_length": 1689.0, "completions/mean_length": 376.61328125, "completions/mean_terminated_length": 382.5912780761719, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.05653333333333333, "grad_norm": 0.006896526087075472, "learning_rate": 4.083333333333334e-06, "loss": 0.0903, "num_tokens": 11936828.0, "reward": 0.9704056978225708, "reward_std": 0.14509528875350952, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.7767488360404968, "rewards/format_reward_step": 0.97265625, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8472011089324951, "aux_distill/mean_u": 0.3172731910665095, "aux_distill/n_active_tok": 324.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4326, "calib/avg_num_step_conf": 10.94140625, "calib/ece": 0.19068, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02390000000000006, "calib/mean_conf": 0.17292000000000002, "calib/mu_c": 0.15379999999999996, "calib/mu_w": 0.17770000000000002, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0818, "calib/std_conf": 0.17552171831428726, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26541371158392435, "calib/step_q_c_n": 423.0, "calib/step_q_gap": -0.00824860969446084, "calib/step_q_w": 0.2736623212783852, "calib/step_q_w_n": 2378.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1353.0, "completions/max_terminated_length": 1353.0, "completions/mean_length": 342.5078125, "completions/mean_terminated_length": 347.9444580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.0576, "grad_norm": 0.007074246183037758, "learning_rate": 4.055555555555556e-06, "loss": 0.0724, "num_tokens": 12130742.0, "reward": 0.9789115190505981, "reward_std": 0.12483196705579758, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7820417881011963, "rewards/format_reward_step": 0.9765625, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8344004545360804, "aux_distill/mean_u": 0.36634527799886907, "aux_distill/n_active_tok": 361.875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4828042328042328, "calib/avg_num_step_conf": 13.19921875, "calib/ece": 0.13919678714859438, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007305037957211852, "calib/mean_conf": 0.17012048192771084, "calib/mu_c": 0.16404761904761905, "calib/mu_w": 0.1713526570048309, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07032128514056223, "calib/std_conf": 0.1639313254384552, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23919137466307278, "calib/step_q_c_n": 371.0, "calib/step_q_gap": 0.00885227891839191, "calib/step_q_w": 0.23033909574468087, "calib/step_q_w_n": 3008.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1977.0, "completions/max_terminated_length": 1977.0, "completions/mean_length": 377.52734375, "completions/mean_terminated_length": 385.0478210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.058666666666666666, "grad_norm": 0.007246608845889568, "learning_rate": 4.027777777777779e-06, "loss": 0.0711, "num_tokens": 12335213.0, "reward": 0.9685904383659363, "reward_std": 0.13275596499443054, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8043683767318726, "rewards/format_reward_step": 0.96875, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8279554862529039, "aux_distill/mean_u": 0.37409573772743887, "aux_distill/n_active_tok": 383.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4842229635807618, "calib/avg_num_step_conf": 12.7578125, "calib/ece": 0.13058964143426297, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.023431470670002785, "calib/mean_conf": 0.15004780876494023, "calib/mu_c": 0.12969696969696973, "calib/mu_w": 0.1531284403669725, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07458167330677291, "calib/std_conf": 0.15279055562966187, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3069805194805195, "calib/step_q_c_n": 308.0, "calib/step_q_gap": 0.063362196289174, "calib/step_q_w": 0.24361832319134552, "calib/step_q_w_n": 2958.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 382.5546875, "completions/mean_terminated_length": 387.0909118652344, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.05973333333333333, "grad_norm": 0.00718336459249258, "learning_rate": 4.000000000000001e-06, "loss": 0.1454, "num_tokens": 12539987.0, "reward": 0.9713689088821411, "reward_std": 0.11349431425333023, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.8372691869735718, "rewards/format_reward_step": 0.9765625, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8112405212596059, "aux_distill/mean_u": 0.3368667943766801, "aux_distill/n_active_tok": 393.25, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4852999340804219, "calib/avg_num_step_conf": 15.98828125, "calib/ece": 0.11681818181818181, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.004132231404958678, "calib/gap": -0.029485827290705355, "calib/mean_conf": 0.14524793388429755, "calib/mu_c": 0.12027027027027028, "calib/mu_w": 0.14975609756097563, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.054586776859504124, "calib/std_conf": 0.15714702180656595, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23159292035398227, "calib/step_q_c_n": 339.0, "calib/step_q_gap": 0.0018957972852555738, "calib/step_q_w": 0.2296971230687267, "calib/step_q_w_n": 3754.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 399.31640625, "completions/mean_terminated_length": 417.244873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.0608, "grad_norm": 0.006855270825326443, "learning_rate": 3.972222222222223e-06, "loss": 0.0401, "num_tokens": 12749004.0, "reward": 0.9371576309204102, "reward_std": 0.19312262535095215, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.7883777618408203, "rewards/format_reward_step": 0.94140625, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8085120283067226, "aux_distill/mean_u": 0.3060995207226738, "aux_distill/n_active_tok": 358.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5294642857142857, "calib/avg_num_step_conf": 12.58203125, "calib/ece": 0.11443999999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015035714285714291, "calib/mean_conf": 0.15012, "calib/mu_c": 0.16275, "calib/mu_w": 0.14771428571428571, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05228, "calib/std_conf": 0.15823143050607866, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2920795107033639, "calib/step_q_c_n": 327.0, "calib/step_q_gap": 0.0573316530668746, "calib/step_q_w": 0.2347478576364893, "calib/step_q_w_n": 2894.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3068.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 385.15234375, "completions/mean_terminated_length": 391.2658996582031, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.06186666666666667, "grad_norm": 0.007380124181509018, "learning_rate": 3.944444444444445e-06, "loss": 0.1052, "num_tokens": 12953923.0, "reward": 0.9748600125312805, "reward_std": 0.12817591428756714, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8208136558532715, "rewards/format_reward_step": 0.97265625, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8197278287261724, "aux_distill/mean_u": 0.29762419152445097, "aux_distill/n_active_tok": 332.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.3801643192488263, "calib/avg_num_step_conf": 11.125, "calib/ece": 0.15956521739130433, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.061835680751173724, "calib/mean_conf": 0.13505928853754942, "calib/mu_c": 0.083, "calib/mu_w": 0.14483568075117373, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06826086956521739, "calib/std_conf": 0.15493622442482427, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23215805471124618, "calib/step_q_c_n": 329.0, "calib/step_q_gap": -0.016218285106141678, "calib/step_q_w": 0.24837633981738785, "calib/step_q_w_n": 2519.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2126.0, "completions/max_terminated_length": 2126.0, "completions/mean_length": 369.046875, "completions/mean_terminated_length": 373.4229431152344, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.06293333333333333, "grad_norm": 0.007437416817992926, "learning_rate": 3.916666666666667e-06, "loss": 0.0987, "num_tokens": 13154647.0, "reward": 0.9803744554519653, "reward_std": 0.08235173672437668, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8162176012992859, "rewards/format_reward_step": 0.98828125, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8610778823494911, "aux_distill/mean_u": 0.31887360298002776, "aux_distill/n_active_tok": 318.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.425182976416373, "calib/avg_num_step_conf": 11.01953125, "calib/ece": 0.11852589641434264, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003103822174030796, "calib/mean_conf": 0.09414342629482073, "calib/mu_c": 0.09441176470588235, "calib/mu_w": 0.09410138248847927, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.038605577689243026, "calib/std_conf": 0.11483733232287055, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2889887640449438, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.0642785056267762, "calib/step_q_w": 0.22471025841816758, "calib/step_q_w_n": 2554.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1242.0, "completions/max_terminated_length": 1242.0, "completions/mean_length": 341.07421875, "completions/mean_terminated_length": 347.8685302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.064, "grad_norm": 0.0072919754311442375, "learning_rate": 3.88888888888889e-06, "loss": 0.0222, "num_tokens": 13350818.0, "reward": 0.9821978807449341, "reward_std": 0.08395448327064514, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8511144518852234, "rewards/format_reward_step": 0.98046875, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8732533380389214, "aux_distill/mean_u": 0.3002823455179606, "aux_distill/n_active_tok": 284.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4817826935588809, "calib/avg_num_step_conf": 9.13671875, "calib/ece": 0.17202734375000003, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.014920810484245768, "calib/mean_conf": 0.06937890625000001, "calib/mu_c": 0.05754716981132075, "calib/mu_w": 0.07246798029556652, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017187499999999998, "calib/std_conf": 0.09771815030383921, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20204999999999998, "calib/step_q_c_n": 400.0, "calib/step_q_gap": -0.04228935018050545, "calib/step_q_w": 0.24433935018050543, "calib/step_q_w_n": 1939.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1601.0, "completions/max_terminated_length": 1601.0, "completions/mean_length": 321.3125, "completions/mean_terminated_length": 322.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.06506666666666666, "grad_norm": 0.008903604932129383, "learning_rate": 3.861111111111112e-06, "loss": 0.0847, "num_tokens": 13537138.0, "reward": 1.0047328472137451, "reward_std": 0.031484510749578476, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.8024345636367798, "rewards/format_reward_step": 1.0, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8323895167559385, "aux_distill/mean_u": 0.3086617885430675, "aux_distill/n_active_tok": 315.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4349730458221024, "calib/avg_num_step_conf": 10.07421875, "calib/ece": 0.13976377952755903, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009975292003593893, "calib/mean_conf": 0.05094488188976379, "calib/mu_c": 0.042619047619047626, "calib/mu_w": 0.05259433962264152, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012677165354330711, "calib/std_conf": 0.06526666162348307, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20084745762711867, "calib/step_q_c_n": 354.0, "calib/step_q_gap": -0.021171868215577955, "calib/step_q_w": 0.22201932584269662, "calib/step_q_w_n": 2225.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3002.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 364.2578125, "completions/mean_terminated_length": 365.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.06613333333333334, "grad_norm": 0.007575937081128359, "learning_rate": 3.833333333333334e-06, "loss": 0.0982, "num_tokens": 13737468.0, "reward": 0.9957789182662964, "reward_std": 0.03903381526470184, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8353078365325928, "rewards/format_reward_step": 0.9921875, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8655327968299389, "aux_distill/mean_u": 0.3562405669229432, "aux_distill/n_active_tok": 318.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4805605247465712, "calib/avg_num_step_conf": 9.96484375, "calib/ece": 0.14362204724409453, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00719618366129994, "calib/mean_conf": 0.041732283464566935, "calib/mu_c": 0.03564102564102564, "calib/mu_w": 0.04283720930232558, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01590551181102362, "calib/std_conf": 0.07945279872663047, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22545270270270273, "calib/step_q_c_n": 296.0, "calib/step_q_gap": 0.015377048600707183, "calib/step_q_w": 0.21007565410199555, "calib/step_q_w_n": 2255.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2956.0, "completions/max_terminated_length": 2956.0, "completions/mean_length": 387.265625, "completions/mean_terminated_length": 387.265625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.0672, "grad_norm": 0.007319794036448002, "learning_rate": 3.8055555555555556e-06, "loss": 0.1671, "num_tokens": 13945248.0, "reward": 0.9936214089393616, "reward_std": 0.039710745215415955, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8427116870880127, "rewards/format_reward_step": 0.9921875, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8316024951636791, "aux_distill/mean_u": 0.2873626919249946, "aux_distill/n_active_tok": 302.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4885526921306949, "calib/avg_num_step_conf": 9.80078125, "calib/ece": 0.13798418972332016, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004450069028992171, "calib/mean_conf": 0.03324110671936759, "calib/mu_c": 0.029512195121951225, "calib/mu_w": 0.033962264150943396, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00458498023715415, "calib/std_conf": 0.048264025690469205, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.16411924119241192, "calib/step_q_c_n": 369.0, "calib/step_q_gap": -0.06580786161132643, "calib/step_q_w": 0.22992710280373835, "calib/step_q_w_n": 2140.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2231.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 334.80078125, "completions/mean_terminated_length": 336.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.06826666666666667, "grad_norm": 0.008024153299629688, "learning_rate": 3.777777777777778e-06, "loss": 0.1567, "num_tokens": 14134733.0, "reward": 0.991310715675354, "reward_std": 0.04444682598114014, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8341840505599976, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8422551490366459, "aux_distill/mean_u": 0.2814362829432169, "aux_distill/n_active_tok": 255.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44934605488850776, "calib/avg_num_step_conf": 8.07421875, "calib/ece": 0.14957031250000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00016723842195540128, "calib/mean_conf": 0.023320312500000002, "calib/mu_c": 0.023181818181818182, "calib/mu_w": 0.023349056603773583, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005078125, "calib/std_conf": 0.027191288125102565, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1940531561461794, "calib/step_q_c_n": 301.0, "calib/step_q_gap": -0.005558961634115039, "calib/step_q_w": 0.19961211778029445, "calib/step_q_w_n": 1766.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 298.44921875, "completions/mean_terminated_length": 299.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.06933333333333333, "grad_norm": 0.008111841976642609, "learning_rate": 3.7500000000000005e-06, "loss": 0.0778, "num_tokens": 14316160.0, "reward": 1.0033427476882935, "reward_std": 0.00751079898327589, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8348105549812317, "rewards/format_reward_step": 1.0, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8705679681152105, "aux_distill/mean_u": 0.3264017254864122, "aux_distill/n_active_tok": 282.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4394767441860465, "calib/avg_num_step_conf": 8.8203125, "calib/ece": 0.14352941176470588, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0030523255813953484, "calib/mean_conf": 0.014823529411764708, "calib/mu_c": 0.01225, "calib/mu_w": 0.015302325581395349, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007450980392156863, "calib/std_conf": 0.019937850379098822, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20936305732484078, "calib/step_q_c_n": 314.0, "calib/step_q_gap": 0.043277666378338714, "calib/step_q_w": 0.16608539094650207, "calib/step_q_w_n": 1944.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2218.0, "completions/max_terminated_length": 2218.0, "completions/mean_length": 342.12109375, "completions/mean_terminated_length": 342.12109375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.0704, "grad_norm": 0.007121607661247253, "learning_rate": 3.7222222222222225e-06, "loss": 0.1123, "num_tokens": 14510095.0, "reward": 0.9977003931999207, "reward_std": 0.014767561107873917, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8430570363998413, "rewards/format_reward_step": 0.99609375, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8964165635406971, "aux_distill/mean_u": 0.30745739046953435, "aux_distill/n_active_tok": 243.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4806829268292683, "calib/avg_num_step_conf": 7.59375, "calib/ece": 0.1858270588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004717560975609755, "calib/mean_conf": 0.015192549019607844, "calib/mu_c": 0.0114, "calib/mu_w": 0.016117560975609756, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0024705882352941176, "calib/std_conf": 0.032740294526154796, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.17552380952380955, "calib/step_q_c_n": 315.0, "calib/step_q_gap": 0.0008153994562834499, "calib/step_q_w": 0.1747084100675261, "calib/step_q_w_n": 1629.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 320.25, "completions/mean_terminated_length": 320.25, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.07146666666666666, "grad_norm": 0.008333339355885983, "learning_rate": 3.694444444444445e-06, "loss": 0.1222, "num_tokens": 14697087.0, "reward": 0.9898937344551086, "reward_std": 0.038719628006219864, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.7961937189102173, "rewards/format_reward_step": 0.98828125, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8725739866495132, "aux_distill/mean_u": 0.29536300696172413, "aux_distill/n_active_tok": 247.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.511605415860735, "calib/avg_num_step_conf": 7.7734375, "calib/ece": 0.17578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000989514404967931, "calib/mean_conf": 0.00953125, "calib/mu_c": 0.00872340425531915, "calib/mu_w": 0.009712918660287081, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000859375, "calib/std_conf": 0.01784741643592988, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17542372881355936, "calib/step_q_c_n": 236.0, "calib/step_q_gap": 0.02437241752507588, "calib/step_q_w": 0.15105131128848348, "calib/step_q_w_n": 1754.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 791.0, "completions/max_terminated_length": 791.0, "completions/mean_length": 297.390625, "completions/mean_terminated_length": 298.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.07253333333333334, "grad_norm": 0.007754533085972071, "learning_rate": 3.6666666666666666e-06, "loss": 0.1273, "num_tokens": 14877307.0, "reward": 0.9974908828735352, "reward_std": 0.014132047072052956, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8152941465377808, "rewards/format_reward_step": 0.99609375, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8729984704405069, "aux_distill/mean_u": 0.29370034451770377, "aux_distill/n_active_tok": 250.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.646583850931677, "calib/avg_num_step_conf": 8.0625, "calib/ece": 0.17317226395737534, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001344695271032011, "calib/mean_conf": 0.009119402709291341, "calib/mu_c": 0.010222473048809789, "calib/mu_w": 0.008877777777777778, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0013020833333333333, "calib/std_conf": 0.023883040880635267, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2219811320754717, "calib/step_q_c_n": 318.0, "calib/step_q_gap": 0.08574401867340986, "calib/step_q_w": 0.13623711340206185, "calib/step_q_w_n": 1746.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1646.0, "completions/max_terminated_length": 1646.0, "completions/mean_length": 318.09375, "completions/mean_terminated_length": 319.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.0736, "grad_norm": 0.00929983053356409, "learning_rate": 3.638888888888889e-06, "loss": 0.0899, "num_tokens": 15063235.0, "reward": 1.0015100240707397, "reward_std": 0.003858521580696106, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.823332667350769, "rewards/format_reward_step": 1.0, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9363727066665888, "aux_distill/mean_u": 0.3052057791038958, "aux_distill/n_active_tok": 213.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5160601976639713, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.1601968503937008, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00018867924528301935, "calib/mean_conf": 0.00515748031496063, "calib/mu_c": 0.005, "calib/mu_w": 0.0051886792452830195, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009504184902402799, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.14195833333333335, "calib/step_q_c_n": 240.0, "calib/step_q_gap": -0.004583228546216622, "calib/step_q_w": 0.14654156187954998, "calib/step_q_w_n": 1511.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1461.0, "completions/max_terminated_length": 1461.0, "completions/mean_length": 272.34375, "completions/mean_terminated_length": 273.4117736816406, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.07466666666666667, "grad_norm": 0.008598068729043007, "learning_rate": 3.6111111111111115e-06, "loss": 0.0989, "num_tokens": 15239947.0, "reward": 0.9929498434066772, "reward_std": 0.023966234177350998, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8296495676040649, "rewards/format_reward_step": 0.9921875, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8899203259497881, "aux_distill/mean_u": 0.3015708150804158, "aux_distill/n_active_tok": 220.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.542351892213196, "calib/avg_num_step_conf": 7.19921875, "calib/ece": 0.18811764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013156330493362387, "calib/mean_conf": 0.004039215686274511, "calib/mu_c": 0.00510204081632653, "calib/mu_w": 0.0037864077669902915, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006842659683318667, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2603875968992248, "calib/step_q_c_n": 258.0, "calib/step_q_gap": 0.12090494705695351, "calib/step_q_w": 0.1394826498422713, "calib/step_q_w_n": 1585.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 953.0, "completions/max_terminated_length": 953.0, "completions/mean_length": 279.7421875, "completions/mean_terminated_length": 280.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.07573333333333333, "grad_norm": 0.00749945780262351, "learning_rate": 3.5833333333333335e-06, "loss": 0.0561, "num_tokens": 15415969.0, "reward": 0.9931343793869019, "reward_std": 0.02386919967830181, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8026750087738037, "rewards/format_reward_step": 0.9921875, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9215385001152754, "aux_distill/mean_u": 0.2936664698069263, "aux_distill/n_active_tok": 237.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4305194805194805, "calib/avg_num_step_conf": 7.515625, "calib/ece": 0.13246666666666668, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015759740259740258, "calib/mean_conf": 0.004788235294117647, "calib/mu_c": 0.0034285714285714284, "calib/mu_w": 0.005004545454545454, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007296135078374823, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18448888888888892, "calib/step_q_c_n": 225.0, "calib/step_q_gap": -0.006087920999280599, "calib/step_q_w": 0.19057680988816952, "calib/step_q_w_n": 1699.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 966.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 289.51953125, "completions/mean_terminated_length": 290.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0768, "grad_norm": 0.006771552842110395, "learning_rate": 3.555555555555556e-06, "loss": 0.0924, "num_tokens": 15594494.0, "reward": 0.9965245127677917, "reward_std": 0.012207376770675182, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.860236644744873, "rewards/format_reward_step": 0.99609375, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8665466215461493, "aux_distill/mean_u": 0.23089176820613588, "aux_distill/n_active_tok": 229.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5551916148051098, "calib/avg_num_step_conf": 7.23828125, "calib/ece": 0.16430078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022997052079921387, "calib/mean_conf": 0.0036679687500000002, "calib/mu_c": 0.005581395348837209, "calib/mu_w": 0.0032816901408450707, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0071032500659221785, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22144396551724138, "calib/step_q_c_n": 232.0, "calib/step_q_gap": -0.001768804377885097, "calib/step_q_w": 0.22321276989512648, "calib/step_q_w_n": 1621.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 296.3671875, "completions/mean_terminated_length": 297.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.07786666666666667, "grad_norm": 0.007758422289043665, "learning_rate": 3.5277777777777784e-06, "loss": 0.0508, "num_tokens": 15777396.0, "reward": 1.0009055137634277, "reward_std": 0.002186012454330921, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8338423371315002, "rewards/format_reward_step": 1.0, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9035820104181767, "aux_distill/mean_u": 0.2673429256586252, "aux_distill/n_active_tok": 233.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5504227053140097, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.18501176470588235, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016751207729468602, "calib/mean_conf": 0.0032235294117647063, "calib/mu_c": 0.004583333333333333, "calib/mu_w": 0.002908212560386473, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0063103583763024305, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21533112582781458, "calib/step_q_c_n": 302.0, "calib/step_q_gap": 0.024556566633859883, "calib/step_q_w": 0.1907745591939547, "calib/step_q_w_n": 1588.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 993.0, "completions/max_terminated_length": 993.0, "completions/mean_length": 289.82421875, "completions/mean_terminated_length": 290.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.07893333333333333, "grad_norm": 0.007173248566687107, "learning_rate": 3.5e-06, "loss": 0.0995, "num_tokens": 15955519.0, "reward": 0.9930218458175659, "reward_std": 0.02364840731024742, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8063561916351318, "rewards/format_reward_step": 0.9921875, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8751150369644165, "aux_distill/mean_u": 0.2630927631062383, "aux_distill/n_active_tok": 224.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5070944972630366, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.30078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.795447997695264e-05, "calib/mean_conf": 0.00390625, "calib/mu_c": 0.003974358974358975, "calib/mu_w": 0.0038764044943820227, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006463161837483261, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24687037037037038, "calib/step_q_c_n": 540.0, "calib/step_q_gap": 0.029165323051758396, "calib/step_q_w": 0.21770504731861198, "calib/step_q_w_n": 1268.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 964.0, "completions/max_terminated_length": 964.0, "completions/mean_length": 283.9296875, "completions/mean_terminated_length": 285.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.08, "grad_norm": 0.010993538424372673, "learning_rate": 3.4722222222222224e-06, "loss": 0.0414, "num_tokens": 16132957.0, "reward": 1.0011824369430542, "reward_std": 0.0027043407317250967, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6976773738861084, "rewards/format_reward_step": 1.0, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9287750497460365, "aux_distill/mean_u": 0.2989380853039391, "aux_distill/n_active_tok": 217.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5268634622082897, "calib/avg_num_step_conf": 6.8671875, "calib/ece": 0.222890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006025774991292232, "calib/mean_conf": 0.003671875, "calib/mu_c": 0.004137931034482759, "calib/mu_w": 0.0035353535353535356, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006658065333441465, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22013698630136985, "calib/step_q_c_n": 365.0, "calib/step_q_gap": -0.016203717216218133, "calib/step_q_w": 0.23634070351758799, "calib/step_q_w_n": 1393.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1078.0, "completions/max_terminated_length": 1078.0, "completions/mean_length": 271.57421875, "completions/mean_terminated_length": 272.63922119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.08106666666666666, "grad_norm": 0.008762000128626823, "learning_rate": 3.444444444444445e-06, "loss": 0.0686, "num_tokens": 16305536.0, "reward": 1.000908613204956, "reward_std": 0.002230893587693572, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7752546668052673, "rewards/format_reward_step": 1.0, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8958236277103424, "aux_distill/mean_u": 0.27076952840991, "aux_distill/n_active_tok": 247.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5445985708280789, "calib/avg_num_step_conf": 7.78515625, "calib/ece": 0.2348828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015645229087852043, "calib/mean_conf": 0.0033984375, "calib/mu_c": 0.00459016393442623, "calib/mu_w": 0.003025641025641026, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006478136117633972, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24567200000000003, "calib/step_q_c_n": 375.0, "calib/step_q_gap": 0.000832692212608177, "calib/step_q_w": 0.24483930778739185, "calib/step_q_w_n": 1618.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 303.20703125, "completions/mean_terminated_length": 304.3960876464844, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.08213333333333334, "grad_norm": 0.00724209938198328, "learning_rate": 3.416666666666667e-06, "loss": 0.1019, "num_tokens": 16487821.0, "reward": 1.0010669231414795, "reward_std": 0.002557475585490465, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7638527154922485, "rewards/format_reward_step": 1.0, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.886054253205657, "aux_distill/mean_u": 0.2860843198626096, "aux_distill/n_active_tok": 292.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48472781375809554, "calib/avg_num_step_conf": 9.13671875, "calib/ece": 0.22411764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009670925958340628, "calib/mean_conf": 0.0033333333333333335, "calib/mu_c": 0.0025862068965517245, "calib/mu_w": 0.0035532994923857873, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007212915068878163, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2432278481012658, "calib/step_q_c_n": 474.0, "calib/step_q_gap": -0.02152657549122744, "calib/step_q_w": 0.26475442359249324, "calib/step_q_w_n": 1865.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 373.08984375, "completions/mean_terminated_length": 373.08984375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.0832, "grad_norm": 0.008203305304050446, "learning_rate": 3.3888888888888893e-06, "loss": 0.1184, "num_tokens": 16691356.0, "reward": 0.9966482520103455, "reward_std": 0.012584471143782139, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7706402540206909, "rewards/format_reward_step": 0.99609375, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8530756682157516, "aux_distill/mean_u": 0.27669342164705013, "aux_distill/n_active_tok": 306.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4914740431981811, "calib/avg_num_step_conf": 9.67578125, "calib/ece": 0.20047058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004698749526335739, "calib/mean_conf": 0.003450980392156863, "calib/mu_c": 0.003076923076923077, "calib/mu_w": 0.003546798029556651, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006250690081087976, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2234047619047619, "calib/step_q_c_n": 420.0, "calib/step_q_gap": -0.022195627011135033, "calib/step_q_w": 0.24560038891589694, "calib/step_q_w_n": 2057.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1474.0, "completions/max_terminated_length": 1474.0, "completions/mean_length": 372.7421875, "completions/mean_terminated_length": 374.2039489746094, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.08426666666666667, "grad_norm": 0.0066146100871264935, "learning_rate": 3.3611111111111117e-06, "loss": 0.0719, "num_tokens": 16893154.0, "reward": 0.9927873611450195, "reward_std": 0.023470385000109673, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7902621030807495, "rewards/format_reward_step": 0.9921875, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8846382517367601, "aux_distill/mean_u": 0.3161391397071147, "aux_distill/n_active_tok": 302.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5348426933254925, "calib/avg_num_step_conf": 9.72265625, "calib/ece": 0.29423529411764704, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013290208762128776, "calib/mean_conf": 0.003803921568627451, "calib/mu_c": 0.004736842105263157, "calib/mu_w": 0.0034078212290502797, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006747605553063313, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2645139028475712, "calib/step_q_c_n": 597.0, "calib/step_q_gap": 0.03888441024714839, "calib/step_q_w": 0.2256294926004228, "calib/step_q_w_n": 1892.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1072.0, "completions/max_terminated_length": 1072.0, "completions/mean_length": 355.96875, "completions/mean_terminated_length": 357.3647155761719, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.08533333333333333, "grad_norm": 0.007952683605253696, "learning_rate": 3.3333333333333333e-06, "loss": 0.1073, "num_tokens": 17086442.0, "reward": 0.9974700808525085, "reward_std": 0.013819643296301365, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7019714713096619, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8901407215744257, "aux_distill/mean_u": 0.285899711043199, "aux_distill/n_active_tok": 327.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5391246684350133, "calib/avg_num_step_conf": 11.09765625, "calib/ece": 0.3053174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004067197170645447, "calib/mean_conf": 0.004206349206349207, "calib/mu_c": 0.004487179487179487, "calib/mu_w": 0.004080459770114942, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00815068159611388, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2041506646971935, "calib/step_q_c_n": 677.0, "calib/step_q_gap": -0.04997927984994144, "calib/step_q_w": 0.25412994454713494, "calib/step_q_w_n": 2164.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 390.67578125, "completions/mean_terminated_length": 395.3083190917969, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.0864, "grad_norm": 0.00825242418795824, "learning_rate": 3.3055555555555558e-06, "loss": 0.091, "num_tokens": 17292703.0, "reward": 0.9857007265090942, "reward_std": 0.03914933651685715, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6823390126228333, "rewards/format_reward_step": 0.984375, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8487306144088507, "aux_distill/mean_u": 0.265551814483768, "aux_distill/n_active_tok": 319.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.472996254681648, "calib/avg_num_step_conf": 10.27734375, "calib/ece": 0.29284584980237155, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011325842696629207, "calib/mean_conf": 0.0035968379446640318, "calib/mu_c": 0.0028, "calib/mu_w": 0.003932584269662921, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006661457030128761, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27258389261744964, "calib/step_q_c_n": 596.0, "calib/step_q_gap": 0.020334261167818224, "calib/step_q_w": 0.2522496314496314, "calib/step_q_w_n": 2035.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2420.0, "completions/max_terminated_length": 2420.0, "completions/mean_length": 392.7109375, "completions/mean_terminated_length": 395.80316162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.08746666666666666, "grad_norm": 0.006242407485842705, "learning_rate": 3.277777777777778e-06, "loss": 0.111, "num_tokens": 17498789.0, "reward": 0.9851671457290649, "reward_std": 0.045587096363306046, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.6929906010627747, "rewards/format_reward_step": 0.984375, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.844496738165617, "aux_distill/mean_u": 0.27586103866241396, "aux_distill/n_active_tok": 350.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5069444444444444, "calib/avg_num_step_conf": 11.12109375, "calib/ece": 0.28003937007874014, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003220390720390723, "calib/mean_conf": 0.003425196850393701, "calib/mu_c": 0.003194444444444444, "calib/mu_w": 0.0035164835164835165, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006122142187278071, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21304761904761907, "calib/step_q_c_n": 651.0, "calib/step_q_gap": -0.028655104085349964, "calib/step_q_w": 0.24170272313296903, "calib/step_q_w_n": 2196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1536.0, "completions/mean_length": 437.68359375, "completions/mean_terminated_length": 439.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.08853333333333334, "grad_norm": 0.006423268001526594, "learning_rate": 3.2500000000000002e-06, "loss": 0.0654, "num_tokens": 17718100.0, "reward": 0.9891560077667236, "reward_std": 0.035011157393455505, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7087808847427368, "rewards/format_reward_step": 0.98828125, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8821609672158957, "aux_distill/mean_u": 0.29654691763738605, "aux_distill/n_active_tok": 311.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5858699411968177, "calib/avg_num_step_conf": 9.92578125, "calib/ece": 0.22686274509803922, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002512106537530266, "calib/mean_conf": 0.004509803921568627, "calib/mu_c": 0.006440677966101695, "calib/mu_w": 0.003928571428571429, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007171218478847034, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24171568627450982, "calib/step_q_c_n": 408.0, "calib/step_q_gap": 0.006632329500013773, "calib/step_q_w": 0.23508335677449604, "calib/step_q_w_n": 2133.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1109.0, "completions/max_terminated_length": 1109.0, "completions/mean_length": 377.25390625, "completions/mean_terminated_length": 378.7333679199219, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.0896, "grad_norm": 0.007551021408289671, "learning_rate": 3.2222222222222227e-06, "loss": 0.0945, "num_tokens": 17920597.0, "reward": 0.9975423812866211, "reward_std": 0.013799784705042839, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7685222625732422, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8072050474584103, "aux_distill/mean_u": 0.27407098827109644, "aux_distill/n_active_tok": 381.0, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4070313968791126, "calib/avg_num_step_conf": 12.80078125, "calib/ece": 0.21127487250996016, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002328480729460425, "calib/mean_conf": 0.003864569721115538, "calib/mu_c": 0.002037037037037037, "calib/mu_w": 0.004365517766497462, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0061037968614311745, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24305845511482252, "calib/step_q_c_n": 479.0, "calib/step_q_gap": 0.02982529535785325, "calib/step_q_w": 0.21323315975696927, "calib/step_q_w_n": 2798.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1740.0, "completions/max_terminated_length": 1740.0, "completions/mean_length": 446.6015625, "completions/mean_terminated_length": 455.4980163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.09066666666666667, "grad_norm": 0.005789844784885645, "learning_rate": 3.1944444444444443e-06, "loss": 0.0193, "num_tokens": 18142751.0, "reward": 0.9769666194915771, "reward_std": 0.05974484235048294, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7664331793785095, "rewards/format_reward_step": 0.9765625, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8167195729911327, "aux_distill/mean_u": 0.27448563426857864, "aux_distill/n_active_tok": 428.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5223398001175779, "calib/avg_num_step_conf": 13.859375, "calib/ece": 0.24583333333333332, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010052910052910057, "calib/mean_conf": 0.0041666666666666675, "calib/mu_c": 0.004920634920634922, "calib/mu_w": 0.003915343915343916, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006147976825559694, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24684210526315786, "calib/step_q_c_n": 608.0, "calib/step_q_gap": 0.03063003043322593, "calib/step_q_w": 0.21621207482993193, "calib/step_q_w_n": 2940.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 503.32421875, "completions/mean_terminated_length": 507.28741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.09173333333333333, "grad_norm": 0.0065255239605903625, "learning_rate": 3.1666666666666667e-06, "loss": 0.1452, "num_tokens": 18377114.0, "reward": 0.9777464270591736, "reward_std": 0.06083172559738159, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7328367233276367, "rewards/format_reward_step": 0.9765625, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7748824749141932, "aux_distill/mean_u": 0.25857830306465757, "aux_distill/n_active_tok": 381.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4614916286149163, "calib/avg_num_step_conf": 12.015625, "calib/ece": 0.2851383399209486, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0029421613394216128, "calib/mean_conf": 0.005928853754940711, "calib/mu_c": 0.0038356164383561648, "calib/mu_w": 0.0067777777777777775, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0012648221343873518, "calib/std_conf": 0.021050167759847967, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1767248322147651, "calib/step_q_c_n": 745.0, "calib/step_q_gap": -0.07141240502247212, "calib/step_q_w": 0.2481372372372372, "calib/step_q_w_n": 2331.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 451.01171875, "completions/mean_terminated_length": 454.56298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.0928, "grad_norm": 0.0069386824034154415, "learning_rate": 3.138888888888889e-06, "loss": 0.0785, "num_tokens": 18598069.0, "reward": 0.9891386032104492, "reward_std": 0.03575124591588974, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.704839825630188, "rewards/format_reward_step": 0.98828125, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7960807308554649, "aux_distill/mean_u": 0.26279148734187663, "aux_distill/n_active_tok": 447.125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4537837837837837, "calib/avg_num_step_conf": 16.36328125, "calib/ece": 0.23917551020408162, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001574774774774774, "calib/mean_conf": 0.005722448979591837, "calib/mu_c": 0.004533333333333334, "calib/mu_w": 0.006108108108108108, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008126832987742656, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17887025139664806, "calib/step_q_c_n": 716.0, "calib/step_q_gap": -0.027347686985154412, "calib/step_q_w": 0.20621793838180247, "calib/step_q_w_n": 3473.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2677.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 515.9921875, "completions/mean_terminated_length": 534.7935180664062, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.09386666666666667, "grad_norm": 0.005010412540286779, "learning_rate": 3.1111111111111116e-06, "loss": 0.0542, "num_tokens": 18840011.0, "reward": 0.9580464959144592, "reward_std": 0.11547387391328812, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7246866822242737, "rewards/format_reward_step": 0.95703125, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8258982449769974, "aux_distill/mean_u": 0.32058147032185685, "aux_distill/n_active_tok": 455.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5010034904013961, "calib/avg_num_step_conf": 15.20703125, "calib/ece": 0.23350996015936257, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006125654450261789, "calib/mean_conf": 0.0055338645418326685, "calib/mu_c": 0.006, "calib/mu_w": 0.005387434554973821, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007922548925671525, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2176074498567335, "calib/step_q_c_n": 698.0, "calib/step_q_gap": 0.012401215520893905, "calib/step_q_w": 0.2052062343358396, "calib/step_q_w_n": 3192.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 543.171875, "completions/mean_terminated_length": 551.793701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.09493333333333333, "grad_norm": 0.005261671729385853, "learning_rate": 3.0833333333333336e-06, "loss": 0.087, "num_tokens": 19087951.0, "reward": 0.9740170836448669, "reward_std": 0.07242131233215332, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7410029172897339, "rewards/format_reward_step": 0.97265625, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8276430629193783, "aux_distill/mean_u": 0.29844895285457557, "aux_distill/n_active_tok": 487.0, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5064856711915535, "calib/avg_num_step_conf": 16.74609375, "calib/ece": 0.20158536585365852, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009321266968325788, "calib/mean_conf": 0.0057317073170731715, "calib/mu_c": 0.006470588235294118, "calib/mu_w": 0.005538461538461539, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007916816690814732, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2181012658227848, "calib/step_q_c_n": 553.0, "calib/step_q_gap": 0.013298587729587141, "calib/step_q_w": 0.20480267809319766, "calib/step_q_w_n": 3734.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2536.0, "completions/max_terminated_length": 2536.0, "completions/mean_length": 537.6484375, "completions/mean_terminated_length": 557.2388916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.096, "grad_norm": 0.004481129348278046, "learning_rate": 3.055555555555556e-06, "loss": 0.0262, "num_tokens": 19328909.0, "reward": 0.9621806144714355, "reward_std": 0.10468724370002747, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7642050981521606, "rewards/format_reward_step": 0.9609375, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.783676678314805, "aux_distill/mean_u": 0.2469408792428226, "aux_distill/n_active_tok": 464.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5252176139272914, "calib/avg_num_step_conf": 15.4453125, "calib/ece": 0.2402788844621514, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 5.461682881037662e-05, "calib/mean_conf": 0.0067330677290836655, "calib/mu_c": 0.006774193548387097, "calib/mu_w": 0.006719576719576721, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008014209377325996, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22713694267515924, "calib/step_q_c_n": 785.0, "calib/step_q_gap": -0.005854758176844527, "calib/step_q_w": 0.23299170085200377, "calib/step_q_w_n": 3169.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2617.0, "completions/max_terminated_length": 2617.0, "completions/mean_length": 543.29296875, "completions/mean_terminated_length": 551.9166870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.09706666666666666, "grad_norm": 0.0064388904720544815, "learning_rate": 3.0277777777777776e-06, "loss": 0.1083, "num_tokens": 19575704.0, "reward": 0.9781495332717896, "reward_std": 0.06891413033008575, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7375491857528687, "rewards/format_reward_step": 0.9765625, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7937708795070648, "aux_distill/mean_u": 0.25537118199335607, "aux_distill/n_active_tok": 453.75, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5101160862354893, "calib/avg_num_step_conf": 16.390625, "calib/ece": 0.17495934959349593, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00656716417910448, "calib/mean_conf": 0.007967479674796748, "calib/mu_c": 0.013333333333333336, "calib/mu_w": 0.006766169154228856, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.020756401680472734, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2312367088607595, "calib/step_q_c_n": 474.0, "calib/step_q_gap": 0.03461878838789548, "calib/step_q_w": 0.19661792047286403, "calib/step_q_w_n": 3722.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2862.0, "completions/max_terminated_length": 2862.0, "completions/mean_length": 502.58203125, "completions/mean_terminated_length": 520.894775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.09813333333333334, "grad_norm": 0.005982478614896536, "learning_rate": 3e-06, "loss": 0.0326, "num_tokens": 19811085.0, "reward": 0.9591376781463623, "reward_std": 0.11132695525884628, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.7854628562927246, "rewards/format_reward_step": 0.95703125, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8032534569501877, "aux_distill/mean_u": 0.26527682188902096, "aux_distill/n_active_tok": 496.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5151528013582343, "calib/avg_num_step_conf": 16.046875, "calib/ece": 0.2391825396825397, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000724278438030559, "calib/mean_conf": 0.007642857142857143, "calib/mu_c": 0.007096774193548388, "calib/mu_w": 0.007821052631578947, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0003968253968253968, "calib/std_conf": 0.010570221938858933, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24839263657957242, "calib/step_q_c_n": 842.0, "calib/step_q_gap": -0.004466824534940711, "calib/step_q_w": 0.25285946111451313, "calib/step_q_w_n": 3266.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2701.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 580.0625, "completions/mean_terminated_length": 584.6299438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.0992, "grad_norm": 0.00656322855502367, "learning_rate": 2.9722222222222225e-06, "loss": 0.0868, "num_tokens": 20065357.0, "reward": 0.9761672019958496, "reward_std": 0.06974552571773529, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7374906539916992, "rewards/format_reward_step": 0.97265625, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7575046606361866, "aux_distill/mean_u": 0.23041620430958304, "aux_distill/n_active_tok": 437.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49822957839262183, "calib/avg_num_step_conf": 14.8671875, "calib/ece": 0.25604, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003162055335968377, "calib/mean_conf": 0.00796, "calib/mu_c": 0.007727272727272728, "calib/mu_w": 0.008043478260869565, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007812707597241817, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28975, "calib/step_q_c_n": 760.0, "calib/step_q_gap": 0.060996979645436655, "calib/step_q_w": 0.22875302035456335, "calib/step_q_w_n": 3046.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 499.359375, "completions/mean_terminated_length": 509.3067932128906, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.10026666666666667, "grad_norm": 0.005905228201299906, "learning_rate": 2.944444444444445e-06, "loss": 0.0575, "num_tokens": 20301873.0, "reward": 0.9745877981185913, "reward_std": 0.08027815073728561, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7187070250511169, "rewards/format_reward_step": 0.97265625, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8056490551680326, "aux_distill/mean_u": 0.3068893133746946, "aux_distill/n_active_tok": 406.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46144943730545135, "calib/avg_num_step_conf": 13.46875, "calib/ece": 0.25535433070866137, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001104637241599489, "calib/mean_conf": 0.008425196850393703, "calib/mu_c": 0.007611940298507463, "calib/mu_w": 0.008716577540106952, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009257913988471854, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26467741935483874, "calib/step_q_c_n": 620.0, "calib/step_q_gap": 0.011791492905050882, "calib/step_q_w": 0.25288592644978786, "calib/step_q_w_n": 2828.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2769.0, "completions/max_terminated_length": 2769.0, "completions/mean_length": 467.4765625, "completions/mean_terminated_length": 471.157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.10133333333333333, "grad_norm": 0.007395831868052483, "learning_rate": 2.916666666666667e-06, "loss": 0.0508, "num_tokens": 20527675.0, "reward": 0.9941019415855408, "reward_std": 0.025779156014323235, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7342976331710815, "rewards/format_reward_step": 0.9921875, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.800974678248167, "aux_distill/mean_u": 0.28526942786374343, "aux_distill/n_active_tok": 428.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5111559139784947, "calib/avg_num_step_conf": 13.9921875, "calib/ece": 0.3588537549407115, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00012970430107526847, "calib/mean_conf": 0.008735177865612648, "calib/mu_c": 0.008817204301075268, "calib/mu_w": 0.008687499999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00824411562022684, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2628656374501992, "calib/step_q_c_n": 1004.0, "calib/step_q_gap": 0.021554543578981233, "calib/step_q_w": 0.24131109387121796, "calib/step_q_w_n": 2578.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2726.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 481.1796875, "completions/mean_terminated_length": 486.8854064941406, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.1024, "grad_norm": 0.005970444995909929, "learning_rate": 2.888888888888889e-06, "loss": 0.0611, "num_tokens": 20756673.0, "reward": 0.9836015105247498, "reward_std": 0.0583636611700058, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6234530806541443, "rewards/format_reward_step": 0.98046875, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8248364944010973, "aux_distill/mean_u": 0.27616341024908203, "aux_distill/n_active_tok": 411.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48250511818351016, "calib/avg_num_step_conf": 13.3828125, "calib/ece": 0.20478260869565215, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011166945840312666, "calib/mean_conf": 0.00865612648221344, "calib/mu_c": 0.007777777777777778, "calib/mu_w": 0.008894472361809045, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010357843069824, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24534420289855075, "calib/step_q_c_n": 552.0, "calib/step_q_gap": -0.014249499258721376, "calib/step_q_w": 0.2595937021572721, "calib/step_q_w_n": 2874.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1860.0, "completions/max_terminated_length": 1860.0, "completions/mean_length": 468.1328125, "completions/mean_terminated_length": 471.81890869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.10346666666666667, "grad_norm": 0.005742794368416071, "learning_rate": 2.861111111111111e-06, "loss": 0.067, "num_tokens": 20981587.0, "reward": 0.985925555229187, "reward_std": 0.04699475318193436, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.776538610458374, "rewards/format_reward_step": 0.984375, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8426261711865664, "aux_distill/mean_u": 0.3216190219877432, "aux_distill/n_active_tok": 404.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5656523357528382, "calib/avg_num_step_conf": 13.57421875, "calib/ece": 0.2075494071146245, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001636888144425832, "calib/mean_conf": 0.012213438735177865, "calib/mu_c": 0.010925925925925927, "calib/mu_w": 0.01256281407035176, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0031620553359683794, "calib/std_conf": 0.0381053498214233, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2518213058419244, "calib/step_q_c_n": 582.0, "calib/step_q_gap": -0.015493094572869903, "calib/step_q_w": 0.2673144004147943, "calib/step_q_w_n": 2893.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1944.0, "completions/max_terminated_length": 1944.0, "completions/mean_length": 462.1640625, "completions/mean_terminated_length": 467.644287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.10453333333333334, "grad_norm": 0.0061998069286346436, "learning_rate": 2.8333333333333335e-06, "loss": 0.0432, "num_tokens": 21206085.0, "reward": 0.9897946715354919, "reward_std": 0.03887510299682617, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7803707122802734, "rewards/format_reward_step": 0.98828125, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7924968916922808, "aux_distill/mean_u": 0.26154136502969894, "aux_distill/n_active_tok": 433.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5699200913242009, "calib/avg_num_step_conf": 13.8828125, "calib/ece": 0.11856573705179282, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008319063926940633, "calib/mean_conf": 0.010836653386454183, "calib/mu_c": 0.0115625, "calib/mu_w": 0.010730593607305937, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009561752988047808, "calib/std_conf": 0.01761974431133978, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2841832669322709, "calib/step_q_c_n": 251.0, "calib/step_q_gap": -0.01624967281946993, "calib/step_q_w": 0.30043293975174085, "calib/step_q_w_n": 3303.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 512.203125, "completions/mean_terminated_length": 514.2117919921875, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.1056, "grad_norm": 0.005434286780655384, "learning_rate": 2.805555555555556e-06, "loss": 0.1352, "num_tokens": 21443009.0, "reward": 0.9817042350769043, "reward_std": 0.057376354932785034, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8579398393630981, "rewards/format_reward_step": 0.98046875, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8403134858235717, "aux_distill/mean_u": 0.304650107976595, "aux_distill/n_active_tok": 362.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4799983638743455, "calib/avg_num_step_conf": 11.4609375, "calib/ece": 0.24011764705882355, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00105693717277487, "calib/mean_conf": 0.012666666666666668, "calib/mu_c": 0.011875, "calib/mu_w": 0.01293193717277487, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009019607843137256, "calib/std_conf": 0.023194545223059995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2982043343653251, "calib/step_q_c_n": 646.0, "calib/step_q_gap": 0.013900397357451133, "calib/step_q_w": 0.284303937007874, "calib/step_q_w_n": 2286.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1673.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 432.72265625, "completions/mean_terminated_length": 434.4196472167969, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.10666666666666667, "grad_norm": 0.007272300310432911, "learning_rate": 2.7777777777777783e-06, "loss": 0.0793, "num_tokens": 21661194.0, "reward": 0.9948787689208984, "reward_std": 0.020361021161079407, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.747570276260376, "rewards/format_reward_step": 0.9921875, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.820907037705183, "aux_distill/mean_u": 0.2527867314052802, "aux_distill/n_active_tok": 374.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4725410341848698, "calib/avg_num_step_conf": 11.85546875, "calib/ece": 0.1340234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012254720473898542, "calib/mean_conf": 0.010507812500000002, "calib/mu_c": 0.00945945945945946, "calib/mu_w": 0.010684931506849314, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011221140827244071, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2470157068062827, "calib/step_q_c_n": 382.0, "calib/step_q_gap": -0.00756171498037389, "calib/step_q_w": 0.2545774217866566, "calib/step_q_w_n": 2653.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1342.0, "completions/max_terminated_length": 1342.0, "completions/mean_length": 440.0703125, "completions/mean_terminated_length": 441.7961120605469, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.10773333333333333, "grad_norm": 0.0064960652962327, "learning_rate": 2.7500000000000004e-06, "loss": 0.0664, "num_tokens": 21880844.0, "reward": 0.9973434805870056, "reward_std": 0.013668889179825783, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.8540620803833008, "rewards/format_reward_step": 0.99609375, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7962197810411453, "aux_distill/mean_u": 0.2730796349751205, "aux_distill/n_active_tok": 331.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5503753753753755, "calib/avg_num_step_conf": 10.765625, "calib/ece": 0.28129921259842516, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004399399399399387, "calib/mean_conf": 0.011850393700787403, "calib/mu_c": 0.012162162162162163, "calib/mu_w": 0.011722222222222224, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009055118110236221, "calib/std_conf": 0.017838990623327684, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2971952554744526, "calib/step_q_c_n": 548.0, "calib/step_q_gap": -0.0031182861922141036, "calib/step_q_w": 0.3003135416666667, "calib/step_q_w_n": 2208.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 389.7109375, "completions/mean_terminated_length": 392.779541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1088, "grad_norm": 0.006796353962272406, "learning_rate": 2.7222222222222224e-06, "loss": 0.062, "num_tokens": 22087306.0, "reward": 0.9954755902290344, "reward_std": 0.027535689994692802, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7097011804580688, "rewards/format_reward_step": 0.9921875, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8647203091531992, "aux_distill/mean_u": 0.29944187739858286, "aux_distill/n_active_tok": 331.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.527927927927928, "calib/avg_num_step_conf": 10.58984375, "calib/ece": 0.28141732283464566, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006831831831831826, "calib/mean_conf": 0.009921259842519688, "calib/mu_c": 0.010405405405405405, "calib/mu_w": 0.009722222222222222, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009306340484634464, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24324257274119448, "calib/step_q_c_n": 653.0, "calib/step_q_gap": 0.026392718513789243, "calib/step_q_w": 0.21684985422740524, "calib/step_q_w_n": 2058.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1834.0, "completions/max_terminated_length": 1834.0, "completions/mean_length": 404.06640625, "completions/mean_terminated_length": 405.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.10986666666666667, "grad_norm": 0.00689524132758379, "learning_rate": 2.6944444444444444e-06, "loss": 0.062, "num_tokens": 22295299.0, "reward": 0.9911972284317017, "reward_std": 0.037496358156204224, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7050508260726929, "rewards/format_reward_step": 0.98828125, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8144241627305746, "aux_distill/mean_u": 0.2616677400131344, "aux_distill/n_active_tok": 343.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5434120335110435, "calib/avg_num_step_conf": 10.99609375, "calib/ece": 0.19433070866141733, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009558263518659564, "calib/mean_conf": 0.010393700787401573, "calib/mu_c": 0.011153846153846155, "calib/mu_w": 0.010198019801980198, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008685364176078153, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.275157399103139, "calib/step_q_c_n": 446.0, "calib/step_q_gap": 0.06505085625805673, "calib/step_q_w": 0.2101065428450823, "calib/step_q_w_n": 2369.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 419.90234375, "completions/mean_terminated_length": 421.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.11093333333333333, "grad_norm": 0.006455368362367153, "learning_rate": 2.666666666666667e-06, "loss": 0.1269, "num_tokens": 22509474.0, "reward": 0.9943621158599854, "reward_std": 0.025546476244926453, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7934117317199707, "rewards/format_reward_step": 0.9921875, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8212667172774673, "aux_distill/mean_u": 0.2547904974645424, "aux_distill/n_active_tok": 321.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5416195324283559, "calib/avg_num_step_conf": 10.12109375, "calib/ece": 0.197734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.003054298642533936, "calib/mean_conf": 0.013203125000000001, "calib/mu_c": 0.01076923076923077, "calib/mu_w": 0.013823529411764707, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00390625, "calib/std_conf": 0.06247416458212447, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25754342431761784, "calib/step_q_c_n": 403.0, "calib/step_q_gap": 0.007443195798422231, "calib/step_q_w": 0.2501002285191956, "calib/step_q_w_n": 2188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1128.0, "completions/max_terminated_length": 1128.0, "completions/mean_length": 380.5625, "completions/mean_terminated_length": 382.054931640625, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.112, "grad_norm": 0.00898874830454588, "learning_rate": 2.6388888888888893e-06, "loss": 0.0861, "num_tokens": 22712658.0, "reward": 1.0001487731933594, "reward_std": 0.009183384478092194, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7971726059913635, "rewards/format_reward_step": 1.0, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8393920790404081, "aux_distill/mean_u": 0.2885217277018143, "aux_distill/n_active_tok": 308.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5007281553398057, "calib/avg_num_step_conf": 9.7421875, "calib/ece": 0.1848828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -3.689320388349307e-05, "calib/mean_conf": 0.0104296875, "calib/mu_c": 0.010400000000000003, "calib/mu_w": 0.010436893203883496, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009239168991437691, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2901785714285714, "calib/step_q_c_n": 392.0, "calib/step_q_gap": 0.009724099497077587, "calib/step_q_w": 0.2804544719314938, "calib/step_q_w_n": 2102.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1085.0, "completions/max_terminated_length": 1085.0, "completions/mean_length": 375.60546875, "completions/mean_terminated_length": 377.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.11306666666666666, "grad_norm": 0.00921398214995861, "learning_rate": 2.6111111111111113e-06, "loss": 0.105, "num_tokens": 22913397.0, "reward": 1.0019341707229614, "reward_std": 0.00327928620390594, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8085559010505676, "rewards/format_reward_step": 1.0, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8549458589404821, "aux_distill/mean_u": 0.2930654655301045, "aux_distill/n_active_tok": 320.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5131324442634633, "calib/avg_num_step_conf": 10.0703125, "calib/ece": 0.17445312500000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002606128473989621, "calib/mean_conf": 0.01, "calib/mu_c": 0.01021276595744681, "calib/mu_w": 0.009952153110047848, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004296875, "calib/std_conf": 0.010307764064044152, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31333333333333335, "calib/step_q_c_n": 453.0, "calib/step_q_gap": 0.06561130980392157, "calib/step_q_w": 0.24772202352941178, "calib/step_q_w_n": 2125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1170.0, "completions/max_terminated_length": 1170.0, "completions/mean_length": 376.2109375, "completions/mean_terminated_length": 377.6863098144531, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.11413333333333334, "grad_norm": 0.01009163074195385, "learning_rate": 2.5833333333333337e-06, "loss": 0.0553, "num_tokens": 23114323.0, "reward": 1.0017719268798828, "reward_std": 0.0037272428162395954, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8199499845504761, "rewards/format_reward_step": 1.0, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.826457666233182, "aux_distill/mean_u": 0.25843460005882996, "aux_distill/n_active_tok": 287.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5170957918050941, "calib/avg_num_step_conf": 9.03125, "calib/ece": 0.3167578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.136212624584819e-05, "calib/mean_conf": 0.0113671875, "calib/mu_c": 0.01142857142857143, "calib/mu_w": 0.011337209302325582, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009646381359859445, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30450741839762613, "calib/step_q_c_n": 674.0, "calib/step_q_gap": 0.0063376992279069455, "calib/step_q_w": 0.2981697191697192, "calib/step_q_w_n": 1638.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 349.85546875, "completions/mean_terminated_length": 351.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.1152, "grad_norm": 0.008953594602644444, "learning_rate": 2.5555555555555557e-06, "loss": 0.1039, "num_tokens": 23307118.0, "reward": 1.003638744354248, "reward_std": 0.005494957789778709, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6791527271270752, "rewards/format_reward_step": 1.0, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8436379954218864, "aux_distill/mean_u": 0.25006158266790723, "aux_distill/n_active_tok": 290.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.501933585540143, "calib/avg_num_step_conf": 9.08984375, "calib/ece": 0.2273046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.247583018078681e-06, "calib/mean_conf": 0.0109765625, "calib/mu_c": 0.01098360655737705, "calib/mu_w": 0.010974358974358972, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009282009517534107, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28361872146118716, "calib/step_q_c_n": 438.0, "calib/step_q_gap": 0.008033491180615415, "calib/step_q_w": 0.27558523028057175, "calib/step_q_w_n": 1889.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 350.03515625, "completions/mean_terminated_length": 351.4078674316406, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.11626666666666667, "grad_norm": 0.00752226123586297, "learning_rate": 2.5277777777777778e-06, "loss": 0.1031, "num_tokens": 23501327.0, "reward": 1.0025138854980469, "reward_std": 0.003788697300478816, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.766746461391449, "rewards/format_reward_step": 1.0, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8580736331641674, "aux_distill/mean_u": 0.24811984237804047, "aux_distill/n_active_tok": 255.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48946814112690895, "calib/avg_num_step_conf": 8.02734375, "calib/ece": 0.1649609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00045602948920484346, "calib/mean_conf": 0.010820312500000002, "calib/mu_c": 0.010444444444444445, "calib/mu_w": 0.010900473933649289, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008689013891250475, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2304778156996587, "calib/step_q_c_n": 293.0, "calib/step_q_gap": 0.00432571581316607, "calib/step_q_w": 0.22615209988649262, "calib/step_q_w_n": 1762.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1089.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 326.859375, "completions/mean_terminated_length": 328.1412048339844, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.11733333333333333, "grad_norm": 0.00968889519572258, "learning_rate": 2.5e-06, "loss": 0.079, "num_tokens": 23689923.0, "reward": 1.0017396211624146, "reward_std": 0.0027383025735616684, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8276980519294739, "rewards/format_reward_step": 1.0, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8748779203742743, "aux_distill/mean_u": 0.2795663521799445, "aux_distill/n_active_tok": 249.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5199227799227799, "calib/avg_num_step_conf": 8.17578125, "calib/ece": 0.263843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008262548262548269, "calib/mean_conf": 0.01231372549019608, "calib/mu_c": 0.011714285714285715, "calib/mu_w": 0.012540540540540542, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0008235294117647058, "calib/std_conf": 0.014916092370688742, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33807439824945296, "calib/step_q_c_n": 457.0, "calib/step_q_gap": 0.03397293125678802, "calib/step_q_w": 0.30410146699266494, "calib/step_q_w_n": 1636.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2087.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 326.27734375, "completions/mean_terminated_length": 327.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.1184, "grad_norm": 0.00978804100304842, "learning_rate": 2.4722222222222226e-06, "loss": 0.0978, "num_tokens": 23880858.0, "reward": 0.9991105198860168, "reward_std": 0.015512233600020409, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7286898493766785, "rewards/format_reward_step": 0.99609375, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8446307703852654, "aux_distill/mean_u": 0.278147945651484, "aux_distill/n_active_tok": 252.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4851499423298732, "calib/avg_num_step_conf": 7.890625, "calib/ece": 0.19054901960784315, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001470588235294116, "calib/mean_conf": 0.011568627450980393, "calib/mu_c": 0.010392156862745099, "calib/mu_w": 0.011862745098039215, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010588235294117648, "calib/std_conf": 0.01792933285282756, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28216374269005845, "calib/step_q_c_n": 342.0, "calib/step_q_gap": 0.03338179990102391, "calib/step_q_w": 0.24878194278903454, "calib/step_q_w_n": 1678.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1947.0, "completions/max_terminated_length": 1947.0, "completions/mean_length": 327.9296875, "completions/mean_terminated_length": 327.9296875, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.11946666666666667, "grad_norm": 0.009700233116745949, "learning_rate": 2.4444444444444447e-06, "loss": 0.1246, "num_tokens": 24072728.0, "reward": 0.9979373216629028, "reward_std": 0.014979146420955658, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8005621433258057, "rewards/format_reward_step": 0.99609375, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8938411138951778, "aux_distill/mean_u": 0.27519478460141256, "aux_distill/n_active_tok": 228.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4992364857986359, "calib/avg_num_step_conf": 7.3125, "calib/ece": 0.1726953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -5.8027079303675164e-05, "calib/mean_conf": 0.0108984375, "calib/mu_c": 0.010851063829787235, "calib/mu_w": 0.01090909090909091, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007928725311082088, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24974921630094044, "calib/step_q_c_n": 319.0, "calib/step_q_gap": -0.016225413447932657, "calib/step_q_w": 0.2659746297488731, "calib/step_q_w_n": 1553.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 926.0, "completions/max_terminated_length": 926.0, "completions/mean_length": 276.69921875, "completions/mean_terminated_length": 277.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.12053333333333334, "grad_norm": 0.011297706514596939, "learning_rate": 2.4166666666666667e-06, "loss": 0.0632, "num_tokens": 24248763.0, "reward": 1.0019012689590454, "reward_std": 0.003506625071167946, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8202089667320251, "rewards/format_reward_step": 1.0, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8577989656478167, "aux_distill/mean_u": 0.24188379755471998, "aux_distill/n_active_tok": 210.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.534063701661467, "calib/avg_num_step_conf": 6.75390625, "calib/ece": 0.2883203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00019516796053108826, "calib/mean_conf": 0.012460937500000002, "calib/mu_c": 0.012597402597402598, "calib/mu_w": 0.01240223463687151, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008088767620663469, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2853211009174312, "calib/step_q_c_n": 436.0, "calib/step_q_gap": 0.02676093077048608, "calib/step_q_w": 0.2585601701469451, "calib/step_q_w_n": 1293.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1168.0, "completions/max_terminated_length": 1168.0, "completions/mean_length": 273.59765625, "completions/mean_terminated_length": 274.67059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.1216, "grad_norm": 0.010183519683778286, "learning_rate": 2.388888888888889e-06, "loss": 0.0621, "num_tokens": 24423828.0, "reward": 1.0017255544662476, "reward_std": 0.010000040754675865, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7065761685371399, "rewards/format_reward_step": 0.99609375, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8538937643170357, "aux_distill/mean_u": 0.22364562285076225, "aux_distill/n_active_tok": 222.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5282455603184323, "calib/avg_num_step_conf": 7.2578125, "calib/ece": 0.2669019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015890998162890382, "calib/mean_conf": 0.011529411764705884, "calib/mu_c": 0.01267605633802817, "calib/mu_w": 0.011086956521739131, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008044856939895886, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3118958333333334, "calib/step_q_c_n": 480.0, "calib/step_q_gap": 0.02730221940009686, "calib/step_q_w": 0.28459361393323657, "calib/step_q_w_n": 1378.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2678.0, "completions/max_terminated_length": 2678.0, "completions/mean_length": 286.85546875, "completions/mean_terminated_length": 287.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.12266666666666666, "grad_norm": 0.010189507156610489, "learning_rate": 2.361111111111111e-06, "loss": 0.0982, "num_tokens": 24602527.0, "reward": 0.9995108842849731, "reward_std": 0.016104616224765778, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7255843877792358, "rewards/format_reward_step": 0.99609375, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8547423658892512, "aux_distill/mean_u": 0.24822352620147983, "aux_distill/n_active_tok": 233.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5182397959183673, "calib/avg_num_step_conf": 7.3671875, "calib/ece": 0.22429687499999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011020408163265275, "calib/mean_conf": 0.01234375, "calib/mu_c": 0.011500000000000002, "calib/mu_w": 0.012602040816326529, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0011328125, "calib/std_conf": 0.019303674156426802, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24276649746192894, "calib/step_q_c_n": 394.0, "calib/step_q_gap": 0.007699473333242585, "calib/step_q_w": 0.23506702412868635, "calib/step_q_w_n": 1492.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 301.54296875, "completions/mean_terminated_length": 302.7254943847656, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.12373333333333333, "grad_norm": 0.009150901809334755, "learning_rate": 2.3333333333333336e-06, "loss": 0.1132, "num_tokens": 24784242.0, "reward": 1.0024328231811523, "reward_std": 0.0038933088071644306, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7704906463623047, "rewards/format_reward_step": 1.0, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8299000021070242, "aux_distill/mean_u": 0.21773052726680453, "aux_distill/n_active_tok": 250.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4671406003159558, "calib/avg_num_step_conf": 7.89453125, "calib/ece": 0.1650390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011700895208004176, "calib/mean_conf": 0.0107421875, "calib/mu_c": 0.00977777777777778, "calib/mu_w": 0.010947867298578197, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007436970667875715, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26386627906976745, "calib/step_q_c_n": 344.0, "calib/step_q_gap": 0.023079159212880146, "calib/step_q_w": 0.2407871198568873, "calib/step_q_w_n": 1677.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 991.0, "completions/max_terminated_length": 991.0, "completions/mean_length": 310.97265625, "completions/mean_terminated_length": 312.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.1248, "grad_norm": 0.008661650121212006, "learning_rate": 2.305555555555556e-06, "loss": 0.1199, "num_tokens": 24970451.0, "reward": 0.9977273344993591, "reward_std": 0.013147260062396526, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8235796689987183, "rewards/format_reward_step": 0.99609375, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8391965217888355, "aux_distill/mean_u": 0.1979998792369977, "aux_distill/n_active_tok": 211.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5883572567783094, "calib/avg_num_step_conf": 6.74609375, "calib/ece": 0.24605468749999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004776714513556619, "calib/mean_conf": 0.011757812500000001, "calib/mu_c": 0.015303030303030303, "calib/mu_w": 0.010526315789473684, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012264816354713338, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20689156626506025, "calib/step_q_c_n": 415.0, "calib/step_q_gap": -0.04102001910079339, "calib/step_q_w": 0.24791158536585364, "calib/step_q_w_n": 1312.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 271.43359375, "completions/mean_terminated_length": 272.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.12586666666666665, "grad_norm": 0.011321449652314186, "learning_rate": 2.277777777777778e-06, "loss": 0.0597, "num_tokens": 25143946.0, "reward": 1.003800868988037, "reward_std": 0.005998665001243353, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7497894763946533, "rewards/format_reward_step": 1.0, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9129964718595147, "aux_distill/mean_u": 0.28269734278456216, "aux_distill/n_active_tok": 216.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5326227636086792, "calib/avg_num_step_conf": 6.8359375, "calib/ece": 0.2666015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011168633422154566, "calib/mean_conf": 0.0107421875, "calib/mu_c": 0.011549295774647889, "calib/mu_w": 0.010432432432432432, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0065428229927794734, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25570806100217863, "calib/step_q_c_n": 459.0, "calib/step_q_gap": 0.028099230638119782, "calib/step_q_w": 0.22760883036405885, "calib/step_q_w_n": 1291.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 850.0, "completions/max_terminated_length": 850.0, "completions/mean_length": 276.4296875, "completions/mean_terminated_length": 277.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.12693333333333334, "grad_norm": 0.012318076565861702, "learning_rate": 2.25e-06, "loss": 0.0955, "num_tokens": 25319776.0, "reward": 0.9992179274559021, "reward_std": 0.015154972672462463, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7249984741210938, "rewards/format_reward_step": 0.99609375, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8805275009945035, "aux_distill/mean_u": 0.2454018055368668, "aux_distill/n_active_tok": 206.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5360638210379143, "calib/avg_num_step_conf": 6.48046875, "calib/ece": 0.234609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013997861666255433, "calib/mean_conf": 0.011484375, "calib/mu_c": 0.01253968253968254, "calib/mu_w": 0.011139896373056997, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008205356839246846, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22673239436619716, "calib/step_q_c_n": 355.0, "calib/step_q_gap": 0.002174879028773863, "calib/step_q_w": 0.2245575153374233, "calib/step_q_w_n": 1304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1198.0, "completions/max_terminated_length": 1198.0, "completions/mean_length": 265.234375, "completions/mean_terminated_length": 266.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.128, "grad_norm": 0.010012347251176834, "learning_rate": 2.222222222222222e-06, "loss": 0.0672, "num_tokens": 25494364.0, "reward": 1.0029863119125366, "reward_std": 0.003938643261790276, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.759878933429718, "rewards/format_reward_step": 1.0, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8603787571191788, "aux_distill/mean_u": 0.23333242359569997, "aux_distill/n_active_tok": 240.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.514964216005205, "calib/avg_num_step_conf": 7.625, "calib/ece": 0.1973046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003448275862068962, "calib/mean_conf": 0.0097265625, "calib/mu_c": 0.01, "calib/mu_w": 0.009655172413793104, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007572787263194032, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17681052631578947, "calib/step_q_c_n": 342.0, "calib/step_q_gap": -0.02709630598234719, "calib/step_q_w": 0.20390683229813666, "calib/step_q_w_n": 1610.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 897.0, "completions/max_terminated_length": 897.0, "completions/mean_length": 294.94140625, "completions/mean_terminated_length": 296.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.12906666666666666, "grad_norm": 0.010293123312294483, "learning_rate": 2.1944444444444445e-06, "loss": 0.0489, "num_tokens": 25674925.0, "reward": 1.0019943714141846, "reward_std": 0.0032473872415721416, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7969574332237244, "rewards/format_reward_step": 1.0, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.908028569072485, "aux_distill/mean_u": 0.2910248312232852, "aux_distill/n_active_tok": 231.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5142921754862053, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.2037890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002777023971053845, "calib/mean_conf": 0.0110546875, "calib/mu_c": 0.011272727272727275, "calib/mu_w": 0.01099502487562189, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007451728610016856, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23934999999999998, "calib/step_q_c_n": 400.0, "calib/step_q_gap": 0.024211852861035388, "calib/step_q_w": 0.2151381471389646, "calib/step_q_w_n": 1468.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 885.0, "completions/max_terminated_length": 885.0, "completions/mean_length": 283.5390625, "completions/mean_terminated_length": 284.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.13013333333333332, "grad_norm": 0.011241834610700607, "learning_rate": 2.166666666666667e-06, "loss": 0.0677, "num_tokens": 25854855.0, "reward": 0.9945212602615356, "reward_std": 0.025330815464258194, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7820113301277161, "rewards/format_reward_step": 0.9921875, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8685710206627846, "aux_distill/mean_u": 0.277867029052038, "aux_distill/n_active_tok": 246.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5124842370744009, "calib/avg_num_step_conf": 7.74609375, "calib/ece": 0.227421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00026733921815888864, "calib/mean_conf": 0.010859375000000001, "calib/mu_c": 0.010655737704918034, "calib/mu_w": 0.010923076923076923, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007957636245102876, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22950338600451467, "calib/step_q_c_n": 443.0, "calib/step_q_gap": -0.003756354255225608, "calib/step_q_w": 0.23325974025974028, "calib/step_q_w_n": 1540.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1194.0, "completions/max_terminated_length": 1194.0, "completions/mean_length": 321.84765625, "completions/mean_terminated_length": 323.1098327636719, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.1312, "grad_norm": 0.009935425594449043, "learning_rate": 2.138888888888889e-06, "loss": 0.0973, "num_tokens": 26042536.0, "reward": 1.0024484395980835, "reward_std": 0.0032734833657741547, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.766615629196167, "rewards/format_reward_step": 1.0, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8649204336106777, "aux_distill/mean_u": 0.24206068871074204, "aux_distill/n_active_tok": 224.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5431367711160457, "calib/avg_num_step_conf": 7.0234375, "calib/ece": 0.2354296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010140636565507035, "calib/mean_conf": 0.0106640625, "calib/mu_c": 0.011428571428571429, "calib/mu_w": 0.010414507772020725, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007122562459964373, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26356481481481486, "calib/step_q_c_n": 432.0, "calib/step_q_gap": 0.02929834336532733, "calib/step_q_w": 0.23426647144948753, "calib/step_q_w_n": 1366.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 281.875, "completions/mean_terminated_length": 282.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.13226666666666667, "grad_norm": 0.012049200013279915, "learning_rate": 2.1111111111111114e-06, "loss": 0.103, "num_tokens": 26221512.0, "reward": 1.0027302503585815, "reward_std": 0.0034669036976993084, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7593668103218079, "rewards/format_reward_step": 1.0, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.830241059884429, "aux_distill/mean_u": 0.21613227087091394, "aux_distill/n_active_tok": 228.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46915446020305357, "calib/avg_num_step_conf": 7.2109375, "calib/ece": 0.2573046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008656901495776182, "calib/mean_conf": 0.0122265625, "calib/mu_c": 0.011594202898550725, "calib/mu_w": 0.012459893048128343, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007559880583289246, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2907912087912088, "calib/step_q_c_n": 455.0, "calib/step_q_gap": 0.03646338708020952, "calib/step_q_w": 0.25432782171099927, "calib/step_q_w_n": 1391.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1133.0, "completions/max_terminated_length": 1133.0, "completions/mean_length": 308.4921875, "completions/mean_terminated_length": 309.70196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.13333333333333333, "grad_norm": 0.009387078694999218, "learning_rate": 2.0833333333333334e-06, "loss": 0.1047, "num_tokens": 26405294.0, "reward": 1.0030217170715332, "reward_std": 0.004005182534456253, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7365121245384216, "rewards/format_reward_step": 1.0, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8431950993835926, "aux_distill/mean_u": 0.2633687595939001, "aux_distill/n_active_tok": 250.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5391230790920626, "calib/avg_num_step_conf": 7.8984375, "calib/ece": 0.3106666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000827576483857324, "calib/mean_conf": 0.010901960784313727, "calib/mu_c": 0.011463414634146341, "calib/mu_w": 0.010635838150289017, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006535738558744991, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.28768749999999993, "calib/step_q_c_n": 480.0, "calib/step_q_gap": 0.01721947146562902, "calib/step_q_w": 0.2704680285343709, "calib/step_q_w_n": 1542.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 314.47265625, "completions/mean_terminated_length": 315.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.1344, "grad_norm": 0.011555805802345276, "learning_rate": 2.0555555555555555e-06, "loss": 0.0752, "num_tokens": 26591263.0, "reward": 0.9976931810379028, "reward_std": 0.02042119950056076, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6828863024711609, "rewards/format_reward_step": 0.9921875, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8271699827164412, "aux_distill/mean_u": 0.23429490647847387, "aux_distill/n_active_tok": 241.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5146700294241278, "calib/avg_num_step_conf": 7.6640625, "calib/ece": 0.2266796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00048003362757460984, "calib/mean_conf": 0.011601562499999999, "calib/mu_c": 0.011967213114754098, "calib/mu_w": 0.011487179487179488, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006567961826822211, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2443217665615142, "calib/step_q_c_n": 317.0, "calib/step_q_gap": -0.042304373256115, "calib/step_q_w": 0.2866261398176292, "calib/step_q_w_n": 1645.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 972.0, "completions/max_terminated_length": 972.0, "completions/mean_length": 295.76171875, "completions/mean_terminated_length": 296.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.13546666666666668, "grad_norm": 0.008244852535426617, "learning_rate": 2.027777777777778e-06, "loss": 0.0643, "num_tokens": 26770650.0, "reward": 1.0027626752853394, "reward_std": 0.002931135706603527, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7672441601753235, "rewards/format_reward_step": 1.0, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8970307148993015, "aux_distill/mean_u": 0.2829727085306536, "aux_distill/n_active_tok": 237.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5422732132576692, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.2336328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0017887984209227752, "calib/mean_conf": 0.012460937500000002, "calib/mu_c": 0.013809523809523811, "calib/mu_w": 0.012020725388601036, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008136916591749836, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3202928870292887, "calib/step_q_c_n": 478.0, "calib/step_q_gap": 0.06881493041627262, "calib/step_q_w": 0.2514779566130161, "calib/step_q_w_n": 1429.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 309.6953125, "completions/mean_terminated_length": 310.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.13653333333333334, "grad_norm": 0.008724896237254143, "learning_rate": 2.0000000000000003e-06, "loss": 0.1234, "num_tokens": 26956596.0, "reward": 1.003287672996521, "reward_std": 0.004546608775854111, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7604816555976868, "rewards/format_reward_step": 1.0, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8841156912967563, "aux_distill/mean_u": 0.2761511465104415, "aux_distill/n_active_tok": 231.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6039580725907384, "calib/avg_num_step_conf": 7.3046875, "calib/ece": 0.2541015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025312891113892354, "calib/mean_conf": 0.0115234375, "calib/mu_c": 0.01338235294117647, "calib/mu_w": 0.010851063829787235, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00788240212014039, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32403940886699506, "calib/step_q_c_n": 406.0, "calib/step_q_gap": 0.06931782416754151, "calib/step_q_w": 0.25472158469945355, "calib/step_q_w_n": 1464.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 943.0, "completions/max_terminated_length": 943.0, "completions/mean_length": 278.8203125, "completions/mean_terminated_length": 279.9137268066406, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.1376, "grad_norm": 0.008412109687924385, "learning_rate": 1.9722222222222224e-06, "loss": 0.1057, "num_tokens": 27130358.0, "reward": 1.0034571886062622, "reward_std": 0.004877117462456226, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7412894368171692, "rewards/format_reward_step": 1.0, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8594199605286121, "aux_distill/mean_u": 0.2653886585378518, "aux_distill/n_active_tok": 261.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5633802816901409, "calib/avg_num_step_conf": 8.16015625, "calib/ece": 0.2662745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00130511328842621, "calib/mean_conf": 0.01215686274509804, "calib/mu_c": 0.013098591549295775, "calib/mu_w": 0.011793478260869565, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007171218478847034, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.38084444444444443, "calib/step_q_c_n": 450.0, "calib/step_q_gap": 0.11618690936207715, "calib/step_q_w": 0.2646575350823673, "calib/step_q_w_n": 1639.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 321.6640625, "completions/mean_terminated_length": 321.6640625, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.13866666666666666, "grad_norm": 0.012677663937211037, "learning_rate": 1.944444444444445e-06, "loss": 0.0936, "num_tokens": 27317992.0, "reward": 0.9996272921562195, "reward_std": 0.015145928598940372, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7258172035217285, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.864942922256887, "aux_distill/mean_u": 0.23996074333237236, "aux_distill/n_active_tok": 243.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45621744791666674, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.2376171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017187499999999998, "calib/mean_conf": 0.0123828125, "calib/mu_c": 0.01109375, "calib/mu_w": 0.0128125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008160136615881119, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2754566326530612, "calib/step_q_c_n": 392.0, "calib/step_q_gap": -0.0505262262969704, "calib/step_q_w": 0.3259828589500316, "calib/step_q_w_n": 1581.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1511.0, "completions/max_terminated_length": 1511.0, "completions/mean_length": 291.46875, "completions/mean_terminated_length": 292.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.13973333333333332, "grad_norm": 0.015411583706736565, "learning_rate": 1.916666666666667e-06, "loss": 0.0912, "num_tokens": 27498816.0, "reward": 1.0026633739471436, "reward_std": 0.0034360941499471664, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7553269863128662, "rewards/format_reward_step": 1.0, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8268191730603576, "aux_distill/mean_u": 0.3091863213388308, "aux_distill/n_active_tok": 251.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.524292324442025, "calib/avg_num_step_conf": 7.8671875, "calib/ece": 0.3333686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003264153511159506, "calib/mean_conf": 0.01172941176470588, "calib/mu_c": 0.011943181818181818, "calib/mu_w": 0.011616766467065867, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007419721841349502, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2572464788732394, "calib/step_q_c_n": 568.0, "calib/step_q_gap": -0.03707578945317824, "calib/step_q_w": 0.29432226832641767, "calib/step_q_w_n": 1446.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 808.0, "completions/max_terminated_length": 808.0, "completions/mean_length": 304.0625, "completions/mean_terminated_length": 305.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.1408, "grad_norm": 0.010893847793340683, "learning_rate": 1.888888888888889e-06, "loss": 0.1174, "num_tokens": 27682248.0, "reward": 1.0001033544540405, "reward_std": 0.015689212828874588, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6603628396987915, "rewards/format_reward_step": 0.99609375, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8875812739133835, "aux_distill/mean_u": 0.2872198548795962, "aux_distill/n_active_tok": 269.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.540341812400636, "calib/avg_num_step_conf": 8.46484375, "calib/ece": 0.121640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010227874933757299, "calib/mean_conf": 0.011171875, "calib/mu_c": 0.012058823529411766, "calib/mu_w": 0.011036036036036036, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006010549807161988, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32817587548638133, "calib/step_q_c_n": 257.0, "calib/step_q_gap": 0.02241974983193107, "calib/step_q_w": 0.30575612565445026, "calib/step_q_w_n": 1910.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 330.49609375, "completions/mean_terminated_length": 331.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.14186666666666667, "grad_norm": 0.008630180731415749, "learning_rate": 1.8611111111111113e-06, "loss": 0.0921, "num_tokens": 27873199.0, "reward": 1.001521110534668, "reward_std": 0.0029451206792145967, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8702297210693359, "rewards/format_reward_step": 1.0, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8462215401232243, "aux_distill/mean_u": 0.26503370410630267, "aux_distill/n_active_tok": 284.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.590686274509804, "calib/avg_num_step_conf": 8.921875, "calib/ece": 0.1923046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021078431372549035, "calib/mean_conf": 0.0108203125, "calib/mu_c": 0.012500000000000002, "calib/mu_w": 0.010392156862745099, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0070510788112418486, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36191326530612244, "calib/step_q_c_n": 392.0, "calib/step_q_gap": 0.09844883613064676, "calib/step_q_w": 0.2634644291754757, "calib/step_q_w_n": 1892.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1043.0, "completions/max_terminated_length": 1043.0, "completions/mean_length": 352.70703125, "completions/mean_terminated_length": 354.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.14293333333333333, "grad_norm": 0.009571945294737816, "learning_rate": 1.8333333333333333e-06, "loss": 0.0882, "num_tokens": 28072444.0, "reward": 1.0024555921554565, "reward_std": 0.0033246742095798254, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.8017863631248474, "rewards/format_reward_step": 1.0, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8613733882084489, "aux_distill/mean_u": 0.2467592106820447, "aux_distill/n_active_tok": 275.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5023255813953489, "calib/avg_num_step_conf": 8.73828125, "calib/ece": 0.1460392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -8.720930232558252e-05, "calib/mean_conf": 0.010823529411764706, "calib/mu_c": 0.01075, "calib/mu_w": 0.010837209302325582, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006664820967111177, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2273584905660377, "calib/step_q_c_n": 318.0, "calib/step_q_gap": -0.062234474519944544, "calib/step_q_w": 0.28959296508598226, "calib/step_q_w_n": 1919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1903.0, "completions/max_terminated_length": 1903.0, "completions/mean_length": 348.3359375, "completions/mean_terminated_length": 349.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.144, "grad_norm": 0.00889371708035469, "learning_rate": 1.8055555555555557e-06, "loss": 0.0565, "num_tokens": 28267498.0, "reward": 0.9917947053909302, "reward_std": 0.0301792211830616, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8390581607818604, "rewards/format_reward_step": 0.98828125, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8540416387841105, "aux_distill/mean_u": 0.2625678658452069, "aux_distill/n_active_tok": 268.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5024567055980669, "calib/avg_num_step_conf": 8.4453125, "calib/ece": 0.242890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.215867901731691e-05, "calib/mean_conf": 0.011015625000000001, "calib/mu_c": 0.011076923076923076, "calib/mu_w": 0.01099476439790576, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00556774243831151, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24743778280542988, "calib/step_q_c_n": 442.0, "calib/step_q_gap": 0.0013622014100810598, "calib/step_q_w": 0.24607558139534882, "calib/step_q_w_n": 1720.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1151.0, "completions/max_terminated_length": 1151.0, "completions/mean_length": 331.91796875, "completions/mean_terminated_length": 333.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.14506666666666668, "grad_norm": 0.009111719205975533, "learning_rate": 1.777777777777778e-06, "loss": 0.1004, "num_tokens": 28460957.0, "reward": 0.9988301992416382, "reward_std": 0.014630033634603024, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7476605176925659, "rewards/format_reward_step": 0.99609375, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8544684145599604, "aux_distill/mean_u": 0.27133787342407634, "aux_distill/n_active_tok": 266.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5065359477124183, "calib/avg_num_step_conf": 8.33203125, "calib/ece": 0.18886274509803924, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 4.9019607843137775e-05, "calib/mean_conf": 0.011137254901960783, "calib/mu_c": 0.011176470588235295, "calib/mu_w": 0.011127450980392157, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005931040340721035, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31501305483028724, "calib/step_q_c_n": 383.0, "calib/step_q_gap": 0.06895819768743008, "calib/step_q_w": 0.24605485714285716, "calib/step_q_w_n": 1750.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2025.0, "completions/max_terminated_length": 2025.0, "completions/mean_length": 338.55859375, "completions/mean_terminated_length": 338.55859375, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.14613333333333334, "grad_norm": 0.007216993719339371, "learning_rate": 1.75e-06, "loss": 0.1535, "num_tokens": 28654612.0, "reward": 0.9982409477233887, "reward_std": 0.014324428513646126, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8011695146560669, "rewards/format_reward_step": 0.99609375, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.823159608989954, "aux_distill/mean_u": 0.21866880539355454, "aux_distill/n_active_tok": 266.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5339099816289039, "calib/avg_num_step_conf": 8.36328125, "calib/ece": 0.26776470588235296, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006376301285976743, "calib/mean_conf": 0.010666666666666668, "calib/mu_c": 0.011126760563380283, "calib/mu_w": 0.010489130434782609, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005525934103155619, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2628949317738791, "calib/step_q_c_n": 513.0, "calib/step_q_gap": 0.0070472659262132975, "calib/step_q_w": 0.25584766584766583, "calib/step_q_w_n": 1628.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1020.0, "completions/max_terminated_length": 1020.0, "completions/mean_length": 317.9453125, "completions/mean_terminated_length": 319.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.1472, "grad_norm": 0.00808706134557724, "learning_rate": 1.7222222222222224e-06, "loss": 0.0917, "num_tokens": 28840342.0, "reward": 0.9952016472816467, "reward_std": 0.025856774300336838, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7208722829818726, "rewards/format_reward_step": 0.9921875, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8427855093032122, "aux_distill/mean_u": 0.22618297184378638, "aux_distill/n_active_tok": 234.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5467355154909855, "calib/avg_num_step_conf": 7.63671875, "calib/ece": 0.22011764705882356, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0030071766147383144, "calib/mean_conf": 0.014392156862745097, "calib/mu_c": 0.01206896551724138, "calib/mu_w": 0.015076142131979694, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0035294117647058825, "calib/std_conf": 0.055838730824167446, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23856725146198832, "calib/step_q_c_n": 342.0, "calib/step_q_gap": -0.00879170700050394, "calib/step_q_w": 0.24735895846249226, "calib/step_q_w_n": 1613.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1879.0, "completions/max_terminated_length": 1879.0, "completions/mean_length": 297.8828125, "completions/mean_terminated_length": 299.0509948730469, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.14826666666666666, "grad_norm": 0.0090831583365798, "learning_rate": 1.6944444444444446e-06, "loss": 0.0901, "num_tokens": 29019696.0, "reward": 0.9971719980239868, "reward_std": 0.019042737782001495, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7716878652572632, "rewards/format_reward_step": 0.99609375, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8314081691205502, "aux_distill/mean_u": 0.21630803803920687, "aux_distill/n_active_tok": 245.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5574928977272727, "calib/avg_num_step_conf": 7.69921875, "calib/ece": 0.301953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012045454545454547, "calib/mean_conf": 0.010546875, "calib/mu_c": 0.011375, "calib/mu_w": 0.010170454545454545, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005124785628138508, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2856230188679245, "calib/step_q_c_n": 530.0, "calib/step_q_gap": 0.05282301886792454, "calib/step_q_w": 0.23279999999999998, "calib/step_q_w_n": 1441.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 807.0, "completions/max_terminated_length": 807.0, "completions/mean_length": 304.4453125, "completions/mean_terminated_length": 305.63922119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.14933333333333335, "grad_norm": 0.012820438481867313, "learning_rate": 1.6666666666666667e-06, "loss": 0.1162, "num_tokens": 29202650.0, "reward": 1.003485918045044, "reward_std": 0.004167159553617239, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6944718956947327, "rewards/format_reward_step": 1.0, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8130811918526888, "aux_distill/mean_u": 0.24643047645682192, "aux_distill/n_active_tok": 256.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5140083745717549, "calib/avg_num_step_conf": 8.08203125, "calib/ece": 0.2669921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002931100114198712, "calib/mean_conf": 0.010351562500000001, "calib/mu_c": 0.01056338028169014, "calib/mu_w": 0.01027027027027027, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005103751444633031, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24803131991051455, "calib/step_q_c_n": 447.0, "calib/step_q_gap": 0.028412331007925146, "calib/step_q_w": 0.2196189889025894, "calib/step_q_w_n": 1622.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1053.0, "completions/max_terminated_length": 1053.0, "completions/mean_length": 345.2109375, "completions/mean_terminated_length": 346.5647277832031, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.1504, "grad_norm": 0.022635893896222115, "learning_rate": 1.638888888888889e-06, "loss": 0.1239, "num_tokens": 29398120.0, "reward": 1.0028630495071411, "reward_std": 0.0038094979245215654, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.728382408618927, "rewards/format_reward_step": 1.0, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.834231823682785, "aux_distill/mean_u": 0.23377564743541748, "aux_distill/n_active_tok": 254.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5165050062578223, "calib/avg_num_step_conf": 8.03125, "calib/ece": 0.2549609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000297246558197746, "calib/mean_conf": 0.0106640625, "calib/mu_c": 0.01088235294117647, "calib/mu_w": 0.010585106382978724, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004671685562630874, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21933649289099527, "calib/step_q_c_n": 422.0, "calib/step_q_gap": -0.0052993700221014295, "calib/step_q_w": 0.2246358629130967, "calib/step_q_w_n": 1634.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1232.0, "completions/max_terminated_length": 1232.0, "completions/mean_length": 331.01953125, "completions/mean_terminated_length": 332.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.15146666666666667, "grad_norm": 0.011043130420148373, "learning_rate": 1.6111111111111113e-06, "loss": 0.0779, "num_tokens": 29588021.0, "reward": 1.002822756767273, "reward_std": 0.003341506700962782, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7400206327438354, "rewards/format_reward_step": 1.0, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8353591244667768, "aux_distill/mean_u": 0.21951536373226854, "aux_distill/n_active_tok": 226.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5444696495619524, "calib/avg_num_step_conf": 7.125, "calib/ece": 0.254765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010325406758448057, "calib/mean_conf": 0.010859375000000001, "calib/mu_c": 0.01161764705882353, "calib/mu_w": 0.010585106382978724, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004424248479614928, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22172015113350127, "calib/step_q_c_n": 397.0, "calib/step_q_gap": 0.0011195905168228937, "calib/step_q_w": 0.22060056061667838, "calib/step_q_w_n": 1427.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1855.0, "completions/max_terminated_length": 1855.0, "completions/mean_length": 303.89453125, "completions/mean_terminated_length": 305.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.15253333333333333, "grad_norm": 0.012597390450537205, "learning_rate": 1.5833333333333333e-06, "loss": 0.0566, "num_tokens": 29773154.0, "reward": 1.0030171871185303, "reward_std": 0.003955106250941753, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7404093742370605, "rewards/format_reward_step": 1.0, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7986927554011345, "aux_distill/mean_u": 0.18862368095835338, "aux_distill/n_active_tok": 228.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5245000000000001, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.30298039215686273, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007357142857142857, "calib/mean_conf": 0.010745098039215688, "calib/mu_c": 0.01125, "calib/mu_w": 0.010514285714285714, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004833880174291448, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21494625719769672, "calib/step_q_c_n": 521.0, "calib/step_q_gap": 0.02100746146598942, "calib/step_q_w": 0.1939387957317073, "calib/step_q_w_n": 1312.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 774.0, "completions/max_terminated_length": 774.0, "completions/mean_length": 297.7578125, "completions/mean_terminated_length": 298.9255065917969, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.1536, "grad_norm": 0.011825519613921642, "learning_rate": 1.5555555555555558e-06, "loss": 0.0774, "num_tokens": 29953508.0, "reward": 0.9995401501655579, "reward_std": 0.01496032252907753, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6904866695404053, "rewards/format_reward_step": 0.99609375, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8074095947667956, "aux_distill/mean_u": 0.22168011550755964, "aux_distill/n_active_tok": 248.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5094478692039668, "calib/avg_num_step_conf": 7.7734375, "calib/ece": 0.346078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003182792816939159, "calib/mean_conf": 0.010784313725490196, "calib/mu_c": 0.01098901098901099, "calib/mu_w": 0.010670731707317074, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004534459457117572, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22369472789115646, "calib/step_q_c_n": 588.0, "calib/step_q_gap": 0.04426798038759011, "calib/step_q_w": 0.17942674750356635, "calib/step_q_w_n": 1402.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2226.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 309.578125, "completions/mean_terminated_length": 309.578125, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.15466666666666667, "grad_norm": 0.012043782509863377, "learning_rate": 1.527777777777778e-06, "loss": 0.116, "num_tokens": 30135464.0, "reward": 0.9999317526817322, "reward_std": 0.015501899644732475, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6483011245727539, "rewards/format_reward_step": 0.99609375, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8472644705325365, "aux_distill/mean_u": 0.2108220952642413, "aux_distill/n_active_tok": 256.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5413558578323826, "calib/avg_num_step_conf": 8.01953125, "calib/ece": 0.15788235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001061869240895132, "calib/mean_conf": 0.010745098039215688, "calib/mu_c": 0.01162790697674419, "calib/mu_w": 0.010566037735849057, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004833880174291448, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.19057553956834533, "calib/step_q_c_n": 278.0, "calib/step_q_gap": -0.02633657310771101, "calib/step_q_w": 0.21691211267605634, "calib/step_q_w_n": 1775.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2752.0, "completions/max_terminated_length": 2752.0, "completions/mean_length": 345.4375, "completions/mean_terminated_length": 345.4375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.15573333333333333, "grad_norm": 0.008280741982161999, "learning_rate": 1.5e-06, "loss": 0.1301, "num_tokens": 30331112.0, "reward": 0.9979776740074158, "reward_std": 0.01372268982231617, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8318929672241211, "rewards/format_reward_step": 0.99609375, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7779547851532698, "aux_distill/mean_u": 0.19806066419509277, "aux_distill/n_active_tok": 259.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5379230905546695, "calib/avg_num_step_conf": 8.15625, "calib/ece": 0.2119607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006911217437533222, "calib/mean_conf": 0.011568627450980393, "calib/mu_c": 0.012105263157894735, "calib/mu_w": 0.011414141414141413, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005229084957108481, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.19256997455470737, "calib/step_q_c_n": 393.0, "calib/step_q_gap": 0.0012975851741763855, "calib/step_q_w": 0.19127238938053098, "calib/step_q_w_n": 1695.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1285.0, "completions/max_terminated_length": 1285.0, "completions/mean_length": 323.95703125, "completions/mean_terminated_length": 325.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.1568, "grad_norm": 0.010307558812201023, "learning_rate": 1.4722222222222225e-06, "loss": 0.0717, "num_tokens": 30517725.0, "reward": 0.9987087249755859, "reward_std": 0.014628560282289982, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7786675691604614, "rewards/format_reward_step": 0.99609375, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8126857522875071, "aux_distill/mean_u": 0.2364917961944136, "aux_distill/n_active_tok": 230.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4886096833555429, "calib/avg_num_step_conf": 7.25390625, "calib/ece": 0.39140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002690526048607141, "calib/mean_conf": 0.0109375, "calib/mu_c": 0.010776699029126214, "calib/mu_w": 0.011045751633986928, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004318835925339142, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18042207792207793, "calib/step_q_c_n": 616.0, "calib/step_q_gap": -0.03018227340749502, "calib/step_q_w": 0.21060435132957295, "calib/step_q_w_n": 1241.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 829.0, "completions/max_terminated_length": 829.0, "completions/mean_length": 292.89453125, "completions/mean_terminated_length": 294.04315185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.15786666666666666, "grad_norm": 0.011318879202008247, "learning_rate": 1.4444444444444445e-06, "loss": 0.0996, "num_tokens": 30697818.0, "reward": 1.0042667388916016, "reward_std": 0.003717840416356921, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6061898469924927, "rewards/format_reward_step": 1.0, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8063416481018066, "aux_distill/mean_u": 0.19125913072319098, "aux_distill/n_active_tok": 256.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6097099621689785, "calib/avg_num_step_conf": 8.14453125, "calib/ece": 0.2275, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026330390920554884, "calib/mean_conf": 0.01078125, "calib/mu_c": 0.01278688524590164, "calib/mu_w": 0.010153846153846152, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004525858861862574, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3081656804733728, "calib/step_q_c_n": 338.0, "calib/step_q_gap": 0.07366081498968649, "calib/step_q_w": 0.2345048654836863, "calib/step_q_w_n": 1747.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1537.0, "completions/max_terminated_length": 1537.0, "completions/mean_length": 351.89453125, "completions/mean_terminated_length": 353.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.15893333333333334, "grad_norm": 0.009276711381971836, "learning_rate": 1.4166666666666667e-06, "loss": 0.1032, "num_tokens": 30892359.0, "reward": 1.0029785633087158, "reward_std": 0.0035227080807089806, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7676757574081421, "rewards/format_reward_step": 1.0, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8243851810693741, "aux_distill/mean_u": 0.22265294731953492, "aux_distill/n_active_tok": 228.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5312522045855379, "calib/avg_num_step_conf": 7.1640625, "calib/ece": 0.305234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008141093474426809, "calib/mean_conf": 0.011171875, "calib/mu_c": 0.011728395061728394, "calib/mu_w": 0.010914285714285713, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00501888523323407, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21167130977130977, "calib/step_q_c_n": 481.0, "calib/step_q_gap": 0.03153073327463568, "calib/step_q_w": 0.1801405764966741, "calib/step_q_w_n": 1353.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 288.16015625, "completions/mean_terminated_length": 290.42913818359375, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.16, "grad_norm": 0.009506311267614365, "learning_rate": 1.3888888888888892e-06, "loss": 0.0897, "num_tokens": 31071088.0, "reward": 1.0036358833312988, "reward_std": 0.0035198626574128866, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6908656358718872, "rewards/format_reward_step": 1.0, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8354400210082531, "aux_distill/mean_u": 0.21287575401324318, "aux_distill/n_active_tok": 267.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4718146718146718, "calib/avg_num_step_conf": 8.55859375, "calib/ece": 0.26301960784313727, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004787644787644773, "calib/mean_conf": 0.011490196078431372, "calib/mu_c": 0.011142857142857144, "calib/mu_w": 0.011621621621621621, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004944287847050819, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2201898734177215, "calib/step_q_c_n": 474.0, "calib/step_q_gap": 0.003247532124768665, "calib/step_q_w": 0.21694234129295284, "calib/step_q_w_n": 1717.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2159.0, "completions/max_terminated_length": 2159.0, "completions/mean_length": 345.140625, "completions/mean_terminated_length": 346.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.16106666666666666, "grad_norm": 0.009733528830111027, "learning_rate": 1.3611111111111112e-06, "loss": 0.1118, "num_tokens": 31266468.0, "reward": 0.9990626573562622, "reward_std": 0.014477596618235111, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7285941243171692, "rewards/format_reward_step": 0.99609375, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8009604308754206, "aux_distill/mean_u": 0.1808466434731913, "aux_distill/n_active_tok": 222.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.44384057971014496, "calib/avg_num_step_conf": 7.01953125, "calib/ece": 0.26953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012318840579710125, "calib/mean_conf": 0.01171875, "calib/mu_c": 0.010833333333333334, "calib/mu_w": 0.012065217391304346, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.004611360801054283, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.27999999999999997, "calib/step_q_c_n": 480.0, "calib/step_q_gap": 0.06599848139711459, "calib/step_q_w": 0.21400151860288538, "calib/step_q_w_n": 1317.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1475.0, "completions/max_terminated_length": 1475.0, "completions/mean_length": 315.1171875, "completions/mean_terminated_length": 316.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.16213333333333332, "grad_norm": 0.011670260690152645, "learning_rate": 1.3333333333333334e-06, "loss": 0.0747, "num_tokens": 31452530.0, "reward": 0.9990614652633667, "reward_std": 0.014696644619107246, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.720779299736023, "rewards/format_reward_step": 0.99609375, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8575300592929125, "aux_distill/mean_u": 0.25102041297178856, "aux_distill/n_active_tok": 272.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5212359550561798, "calib/avg_num_step_conf": 8.8203125, "calib/ece": 0.28529644268774706, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000455430711610489, "calib/mean_conf": 0.011146245059288538, "calib/mu_c": 0.011466666666666668, "calib/mu_w": 0.01101123595505618, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004519430572410558, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1944256120527307, "calib/step_q_c_n": 531.0, "calib/step_q_gap": -0.009506640408184164, "calib/step_q_w": 0.20393225246091487, "calib/step_q_w_n": 1727.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1845.0, "completions/max_terminated_length": 1845.0, "completions/mean_length": 385.03515625, "completions/mean_terminated_length": 388.0669250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.1632, "grad_norm": 0.009350521489977837, "learning_rate": 1.3055555555555556e-06, "loss": 0.1013, "num_tokens": 31658419.0, "reward": 0.9915691614151001, "reward_std": 0.037060413509607315, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7018882632255554, "rewards/format_reward_step": 0.98828125, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7743056090548635, "aux_distill/mean_u": 0.18933966077084272, "aux_distill/n_active_tok": 226.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5237829932446549, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.3130078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007040880284142348, "calib/mean_conf": 0.0112109375, "calib/mu_c": 0.011686746987951807, "calib/mu_w": 0.010982658959537572, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00464548225387782, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17774011299435027, "calib/step_q_c_n": 531.0, "calib/step_q_gap": -0.05550665743366526, "calib/step_q_w": 0.23324677042801553, "calib/step_q_w_n": 1285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 698.0, "completions/max_terminated_length": 698.0, "completions/mean_length": 291.1015625, "completions/mean_terminated_length": 292.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.16426666666666667, "grad_norm": 0.011979682371020317, "learning_rate": 1.2777777777777779e-06, "loss": 0.1075, "num_tokens": 31837381.0, "reward": 1.0037152767181396, "reward_std": 0.004051705356687307, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.683212161064148, "rewards/format_reward_step": 1.0, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7905787313356996, "aux_distill/mean_u": 0.18707750770316992, "aux_distill/n_active_tok": 227.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4896075823079481, "calib/avg_num_step_conf": 7.1484375, "calib/ece": 0.23109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001662786830728298, "calib/mean_conf": 0.011093750000000001, "calib/mu_c": 0.010967741935483872, "calib/mu_w": 0.011134020618556702, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004798172666495027, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2575570776255708, "calib/step_q_c_n": 438.0, "calib/step_q_gap": 0.05267791095890412, "calib/step_q_w": 0.20487916666666667, "calib/step_q_w_n": 1392.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 763.0, "completions/max_terminated_length": 763.0, "completions/mean_length": 297.05078125, "completions/mean_terminated_length": 298.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.16533333333333333, "grad_norm": 0.01167488656938076, "learning_rate": 1.25e-06, "loss": 0.1079, "num_tokens": 32020642.0, "reward": 1.0025831460952759, "reward_std": 0.003791379276663065, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7629789113998413, "rewards/format_reward_step": 1.0, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7945300079882145, "aux_distill/mean_u": 0.17373346807153683, "aux_distill/n_active_tok": 215.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5321022284556646, "calib/avg_num_step_conf": 6.7265625, "calib/ece": 0.25862745098039214, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000689574567554932, "calib/mean_conf": 0.011960784313725489, "calib/mu_c": 0.012463768115942029, "calib/mu_w": 0.011774193548387097, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005095021731423283, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18386792452830192, "calib/step_q_c_n": 424.0, "calib/step_q_gap": -0.04083931738232985, "calib/step_q_w": 0.22470724191063177, "calib/step_q_w_n": 1298.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2113.0, "completions/max_terminated_length": 2113.0, "completions/mean_length": 302.0390625, "completions/mean_terminated_length": 302.0390625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.1664, "grad_norm": 0.011816912330687046, "learning_rate": 1.2222222222222223e-06, "loss": 0.1044, "num_tokens": 32202724.0, "reward": 0.9954628944396973, "reward_std": 0.0261252298951149, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7292070388793945, "rewards/format_reward_step": 0.9921875, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8267823327332735, "aux_distill/mean_u": 0.22326160780332782, "aux_distill/n_active_tok": 226.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5319767441860466, "calib/avg_num_step_conf": 7.1640625, "calib/ece": 0.32421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007387140902872773, "calib/mean_conf": 0.01171875, "calib/mu_c": 0.012209302325581395, "calib/mu_w": 0.011470588235294118, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004938587696649721, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23325515021459226, "calib/step_q_c_n": 466.0, "calib/step_q_gap": 0.0031820507993875757, "calib/step_q_w": 0.23007309941520468, "calib/step_q_w_n": 1368.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 802.0, "completions/max_terminated_length": 802.0, "completions/mean_length": 295.01171875, "completions/mean_terminated_length": 296.16864013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.16746666666666668, "grad_norm": 0.011997681111097336, "learning_rate": 1.1944444444444446e-06, "loss": 0.085, "num_tokens": 32381975.0, "reward": 1.0040206909179688, "reward_std": 0.0045976778492331505, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6721038818359375, "rewards/format_reward_step": 1.0, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8071128753945231, "aux_distill/mean_u": 0.1967308060436971, "aux_distill/n_active_tok": 216.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5416777807415311, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.3492549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010082688716991223, "calib/mean_conf": 0.011529411764705882, "calib/mu_c": 0.012173913043478262, "calib/mu_w": 0.01116564417177914, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005127216655688678, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22468283582089552, "calib/step_q_c_n": 536.0, "calib/step_q_gap": 0.014818919089977983, "calib/step_q_w": 0.20986391673091753, "calib/step_q_w_n": 1297.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 303.7578125, "completions/mean_terminated_length": 304.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.16853333333333334, "grad_norm": 0.012824364937841892, "learning_rate": 1.1666666666666668e-06, "loss": 0.1108, "num_tokens": 32564977.0, "reward": 1.0003893375396729, "reward_std": 0.01565980166196823, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6453101634979248, "rewards/format_reward_step": 0.99609375, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8039664840325713, "aux_distill/mean_u": 0.19331534523575358, "aux_distill/n_active_tok": 210.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5180052120350629, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.2500390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003529969201610979, "calib/mean_conf": 0.0116796875, "calib/mu_c": 0.011940298507462687, "calib/mu_w": 0.011587301587301589, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004912410294584905, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20959330143540672, "calib/step_q_c_n": 418.0, "calib/step_q_gap": -0.0004089820291602042, "calib/step_q_w": 0.21000228346456692, "calib/step_q_w_n": 1270.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 711.0, "completions/max_terminated_length": 711.0, "completions/mean_length": 276.69140625, "completions/mean_terminated_length": 277.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.1696, "grad_norm": 0.011785455979406834, "learning_rate": 1.138888888888889e-06, "loss": 0.0765, "num_tokens": 32740594.0, "reward": 1.003044605255127, "reward_std": 0.0038546710275113583, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.744370698928833, "rewards/format_reward_step": 1.0, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8207447240129113, "aux_distill/mean_u": 0.19969249503579878, "aux_distill/n_active_tok": 222.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5319074333800841, "calib/avg_num_step_conf": 7.26953125, "calib/ece": 0.25894117647058823, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009209911173445537, "calib/mean_conf": 0.011647058823529413, "calib/mu_c": 0.012318840579710146, "calib/mu_w": 0.011397849462365592, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005498033621434317, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20069499999999998, "calib/step_q_c_n": 400.0, "calib/step_q_gap": -0.04821848391512665, "calib/step_q_w": 0.24891348391512663, "calib/step_q_w_n": 1461.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2247.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 292.94921875, "completions/mean_terminated_length": 294.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.17066666666666666, "grad_norm": 0.016354886814951897, "learning_rate": 1.111111111111111e-06, "loss": 0.0906, "num_tokens": 32920429.0, "reward": 0.9993313550949097, "reward_std": 0.015432949177920818, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7330378293991089, "rewards/format_reward_step": 0.99609375, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7666348684579134, "aux_distill/mean_u": 0.19474998295763324, "aux_distill/n_active_tok": 206.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5205498094719652, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.33294117647058824, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003504354926510602, "calib/mean_conf": 0.01215686274509804, "calib/mu_c": 0.012386363636363635, "calib/mu_w": 0.012035928143712574, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005428221638587695, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18919921875, "calib/step_q_c_n": 512.0, "calib/step_q_gap": 0.00034998405612243766, "calib/step_q_w": 0.18884923469387757, "calib/step_q_w_n": 1176.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1132.0, "completions/max_terminated_length": 1132.0, "completions/mean_length": 284.984375, "completions/mean_terminated_length": 286.10198974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.17173333333333332, "grad_norm": 0.014039784669876099, "learning_rate": 1.0833333333333335e-06, "loss": 0.0816, "num_tokens": 33097305.0, "reward": 1.0002632141113281, "reward_std": 0.016159221529960632, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6606827974319458, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7794603575021029, "aux_distill/mean_u": 0.17591780110662303, "aux_distill/n_active_tok": 187.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5845205057045946, "calib/avg_num_step_conf": 5.890625, "calib/ece": 0.437109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018538390379278461, "calib/mean_conf": 0.012109375000000002, "calib/mu_c": 0.013130434782608696, "calib/mu_w": 0.01127659574468085, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005748959654526634, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25271074380165287, "calib/step_q_c_n": 605.0, "calib/step_q_gap": 0.03650653560674702, "calib/step_q_w": 0.21620420819490585, "calib/step_q_w_n": 903.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 849.0, "completions/max_terminated_length": 849.0, "completions/mean_length": 254.84375, "completions/mean_terminated_length": 255.84315490722656, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1728, "grad_norm": 0.022141166031360626, "learning_rate": 1.0555555555555557e-06, "loss": 0.1011, "num_tokens": 33266689.0, "reward": 1.0058085918426514, "reward_std": 0.00576754380017519, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5623984336853027, "rewards/format_reward_step": 1.0, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8130544321611524, "aux_distill/mean_u": 0.23703817534060598, "aux_distill/n_active_tok": 214.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.505476330491032, "calib/avg_num_step_conf": 6.6953125, "calib/ece": 0.28615686274509805, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005049985298441629, "calib/mean_conf": 0.011882352941176472, "calib/mu_c": 0.012236842105263157, "calib/mu_w": 0.011731843575418994, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005772303916365068, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.19712616822429907, "calib/step_q_c_n": 428.0, "calib/step_q_gap": -0.031108668478655815, "calib/step_q_w": 0.22823483670295489, "calib/step_q_w_n": 1286.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 298.65625, "completions/mean_terminated_length": 298.65625, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.17386666666666667, "grad_norm": 0.011381106451153755, "learning_rate": 1.0277777777777777e-06, "loss": 0.1025, "num_tokens": 33447977.0, "reward": 0.9996396899223328, "reward_std": 0.014509235508739948, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.706310510635376, "rewards/format_reward_step": 0.99609375, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8056136062368751, "aux_distill/mean_u": 0.17818100797914577, "aux_distill/n_active_tok": 220.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5464912280701754, "calib/avg_num_step_conf": 7.03515625, "calib/ece": 0.2853125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001146198830409357, "calib/mean_conf": 0.0115625, "calib/mu_c": 0.012368421052631579, "calib/mu_w": 0.011222222222222222, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.004749588797990832, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.14924210526315787, "calib/step_q_c_n": 475.0, "calib/step_q_gap": -0.04428421449551484, "calib/step_q_w": 0.1935263197586727, "calib/step_q_w_n": 1326.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 951.0, "completions/max_terminated_length": 951.0, "completions/mean_length": 299.203125, "completions/mean_terminated_length": 300.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.17493333333333333, "grad_norm": 0.012084920890629292, "learning_rate": 1.0000000000000002e-06, "loss": 0.0816, "num_tokens": 33630709.0, "reward": 0.999687671661377, "reward_std": 0.014886870048940182, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7064065933227539, "rewards/format_reward_step": 0.99609375, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8530535958707333, "aux_distill/mean_u": 0.21941893875711735, "aux_distill/n_active_tok": 209.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5641502746765905, "calib/avg_num_step_conf": 6.94140625, "calib/ece": 0.21129411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016400850611376933, "calib/mean_conf": 0.01223529411764706, "calib/mu_c": 0.013508771929824562, "calib/mu_w": 0.011868686868686869, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005679644545955035, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20603399433427763, "calib/step_q_c_n": 353.0, "calib/step_q_gap": -0.004884334317407785, "calib/step_q_w": 0.21091832865168542, "calib/step_q_w_n": 1424.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1273.0, "completions/max_terminated_length": 1273.0, "completions/mean_length": 300.19140625, "completions/mean_terminated_length": 301.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.176, "grad_norm": 0.012888389639556408, "learning_rate": 9.722222222222224e-07, "loss": 0.0669, "num_tokens": 33813134.0, "reward": 0.9990109205245972, "reward_std": 0.015108481980860233, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7792718410491943, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8504758551716805, "aux_distill/mean_u": 0.2541528017707982, "aux_distill/n_active_tok": 225.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4875205606808267, "calib/avg_num_step_conf": 7.09765625, "calib/ece": 0.2967578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.22548809268401e-05, "calib/mean_conf": 0.011835937500000001, "calib/mu_c": 0.011772151898734177, "calib/mu_w": 0.011864406779661017, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005461909784690127, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1830891304347826, "calib/step_q_c_n": 506.0, "calib/step_q_gap": -0.012006216628527883, "calib/step_q_w": 0.19509534706331047, "calib/step_q_w_n": 1311.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1163.0, "completions/max_terminated_length": 1163.0, "completions/mean_length": 320.6953125, "completions/mean_terminated_length": 321.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.17706666666666668, "grad_norm": 0.01425196509808302, "learning_rate": 9.444444444444445e-07, "loss": 0.096, "num_tokens": 34001416.0, "reward": 1.0035477876663208, "reward_std": 0.004095728974789381, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6985019445419312, "rewards/format_reward_step": 1.0, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8620406305417418, "aux_distill/mean_u": 0.22939131249790382, "aux_distill/n_active_tok": 208.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5423875432525952, "calib/avg_num_step_conf": 6.5, "calib/ece": 0.3211764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010000000000000026, "calib/mean_conf": 0.012156862745098041, "calib/mu_c": 0.012823529411764708, "calib/mu_w": 0.011823529411764705, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006043543140165663, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.22652014652014651, "calib/step_q_c_n": 546.0, "calib/step_q_gap": 0.0010997529602180534, "calib/step_q_w": 0.22542039355992846, "calib/step_q_w_n": 1118.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2255.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 297.59375, "completions/mean_terminated_length": 297.59375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.17813333333333334, "grad_norm": 0.012138527818024158, "learning_rate": 9.166666666666666e-07, "loss": 0.1287, "num_tokens": 34183208.0, "reward": 1.0002597570419312, "reward_std": 0.01507105678319931, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.6723945140838623, "rewards/format_reward_step": 0.99609375, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8371885055676103, "aux_distill/mean_u": 0.25030672633272105, "aux_distill/n_active_tok": 206.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5726744186046512, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.31515625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001960132890365448, "calib/mean_conf": 0.012968750000000001, "calib/mu_c": 0.014285714285714285, "calib/mu_w": 0.012325581395348837, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00653827373528365, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2508533472803347, "calib/step_q_c_n": 478.0, "calib/step_q_gap": 0.031394022385819925, "calib/step_q_w": 0.21945932489451478, "calib/step_q_w_n": 1185.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 740.0, "completions/max_terminated_length": 740.0, "completions/mean_length": 290.72265625, "completions/mean_terminated_length": 291.8627624511719, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.1792, "grad_norm": 0.015005046501755714, "learning_rate": 8.88888888888889e-07, "loss": 0.0781, "num_tokens": 34362305.0, "reward": 1.0045819282531738, "reward_std": 0.005785136483609676, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6810390949249268, "rewards/format_reward_step": 1.0, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7752046966925263, "aux_distill/mean_u": 0.17514847724133747, "aux_distill/n_active_tok": 194.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5888518518518518, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.28125490196078434, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003122222222222224, "calib/mean_conf": 0.012862745098039217, "calib/mu_c": 0.015066666666666667, "calib/mu_w": 0.011944444444444443, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006747605553063314, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23940054495912808, "calib/step_q_c_n": 367.0, "calib/step_q_gap": 0.023670679187316024, "calib/step_q_w": 0.21572986577181205, "calib/step_q_w_n": 1192.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 949.0, "completions/max_terminated_length": 949.0, "completions/mean_length": 279.140625, "completions/mean_terminated_length": 280.2353210449219, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.18026666666666666, "grad_norm": 0.01291783805936575, "learning_rate": 8.611111111111112e-07, "loss": 0.0862, "num_tokens": 34537949.0, "reward": 1.0004026889801025, "reward_std": 0.01507566962391138, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7117429971694946, "rewards/format_reward_step": 0.99609375, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7850721022114158, "aux_distill/mean_u": 0.18490419322101734, "aux_distill/n_active_tok": 212.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5295285186990738, "calib/avg_num_step_conf": 6.65625, "calib/ece": 0.311328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017967825057455325, "calib/mean_conf": 0.012890625000000001, "calib/mu_c": 0.013012048192771086, "calib/mu_w": 0.012832369942196532, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006392420285727073, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2407480496453901, "calib/step_q_c_n": 564.0, "calib/step_q_gap": 0.016790681224337467, "calib/step_q_w": 0.22395736842105263, "calib/step_q_w_n": 1140.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 813.0, "completions/max_terminated_length": 813.0, "completions/mean_length": 290.69140625, "completions/mean_terminated_length": 291.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.18133333333333335, "grad_norm": 0.017970673739910126, "learning_rate": 8.333333333333333e-07, "loss": 0.087, "num_tokens": 34716518.0, "reward": 1.0041152238845825, "reward_std": 0.004571193363517523, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.684011697769165, "rewards/format_reward_step": 1.0, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7836430184543133, "aux_distill/mean_u": 0.19953727431926524, "aux_distill/n_active_tok": 214.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.593505386344261, "calib/avg_num_step_conf": 6.796875, "calib/ece": 0.2562890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032992327365728887, "calib/mean_conf": 0.0132421875, "calib/mu_c": 0.01565217391304348, "calib/mu_w": 0.01235294117647059, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007291096982954193, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23558839779005522, "calib/step_q_c_n": 362.0, "calib/step_q_gap": 0.031814522608632906, "calib/step_q_w": 0.20377387518142231, "calib/step_q_w_n": 1378.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1211.0, "completions/max_terminated_length": 1211.0, "completions/mean_length": 295.1875, "completions/mean_terminated_length": 296.3451232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.1824, "grad_norm": 0.012783910147845745, "learning_rate": 8.055555555555557e-07, "loss": 0.0813, "num_tokens": 34898982.0, "reward": 1.004104495048523, "reward_std": 0.005231703631579876, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7386777400970459, "rewards/format_reward_step": 1.0, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7730856984853745, "aux_distill/mean_u": 0.16074557660169184, "aux_distill/n_active_tok": 193.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5363810277722805, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.3488671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016801899861468433, "calib/mean_conf": 0.014414062500000002, "calib/mu_c": 0.015483870967741935, "calib/mu_w": 0.013803680981595092, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008270832923357462, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24181102362204723, "calib/step_q_c_n": 508.0, "calib/step_q_gap": 0.008519670238588573, "calib/step_q_w": 0.23329135338345866, "calib/step_q_w_n": 1064.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 950.0, "completions/max_terminated_length": 950.0, "completions/mean_length": 263.9140625, "completions/mean_terminated_length": 264.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.18346666666666667, "grad_norm": 0.01724671758711338, "learning_rate": 7.777777777777779e-07, "loss": 0.097, "num_tokens": 35069896.0, "reward": 1.0054869651794434, "reward_std": 0.0069214459508657455, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6476925611495972, "rewards/format_reward_step": 1.0, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7590753780677915, "aux_distill/mean_u": 0.18627376776313878, "aux_distill/n_active_tok": 209.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.540965250965251, "calib/avg_num_step_conf": 6.60546875, "calib/ece": 0.2604705882352941, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0019150579150579161, "calib/mean_conf": 0.01403921568627451, "calib/mu_c": 0.01542857142857143, "calib/mu_w": 0.013513513513513514, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006899732260544964, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23605496828752645, "calib/step_q_c_n": 473.0, "calib/step_q_gap": 0.017361700635638105, "calib/step_q_w": 0.21869326765188835, "calib/step_q_w_n": 1218.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 695.0, "completions/max_terminated_length": 695.0, "completions/mean_length": 288.703125, "completions/mean_terminated_length": 289.8352966308594, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.18453333333333333, "grad_norm": 0.014482683502137661, "learning_rate": 7.5e-07, "loss": 0.0799, "num_tokens": 35246964.0, "reward": 1.0001906156539917, "reward_std": 0.016043417155742645, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7308500409126282, "rewards/format_reward_step": 0.99609375, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7893230896443129, "aux_distill/mean_u": 0.20265565190850546, "aux_distill/n_active_tok": 205.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4901930501930501, "calib/avg_num_step_conf": 6.77734375, "calib/ece": 0.26050980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003938223938223951, "calib/mean_conf": 0.014, "calib/mu_c": 0.014285714285714285, "calib/mu_w": 0.01389189189189189, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007545937746270388, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.14781926229508197, "calib/step_q_c_n": 488.0, "calib/step_q_gap": -0.08902917395191084, "calib/step_q_w": 0.2368484362469928, "calib/step_q_w_n": 1247.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1479.0, "completions/max_terminated_length": 1479.0, "completions/mean_length": 294.26953125, "completions/mean_terminated_length": 295.4235534667969, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.1856, "grad_norm": 0.013904116116464138, "learning_rate": 7.222222222222222e-07, "loss": 0.0318, "num_tokens": 35426529.0, "reward": 0.9998739361763, "reward_std": 0.01602732203900814, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7302167415618896, "rewards/format_reward_step": 0.99609375, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.81294996291399, "aux_distill/mean_u": 0.19859697078451224, "aux_distill/n_active_tok": 218.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5849780701754386, "calib/avg_num_step_conf": 7.140625, "calib/ece": 0.28203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00321637426900585, "calib/mean_conf": 0.01484375, "calib/mu_c": 0.017105263157894738, "calib/mu_w": 0.013888888888888888, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008522431339559152, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18974770642201833, "calib/step_q_c_n": 436.0, "calib/step_q_gap": -0.014610195876832266, "calib/step_q_w": 0.2043579022988506, "calib/step_q_w_n": 1392.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 957.0, "completions/max_terminated_length": 957.0, "completions/mean_length": 302.40625, "completions/mean_terminated_length": 303.5921630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.18666666666666668, "grad_norm": 0.01465411577373743, "learning_rate": 6.944444444444446e-07, "loss": 0.0535, "num_tokens": 35609769.0, "reward": 1.0049315690994263, "reward_std": 0.005668940953910351, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7129883170127869, "rewards/format_reward_step": 1.0, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8950878940522671, "aux_distill/mean_u": 0.26817866945524593, "aux_distill/n_active_tok": 227.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6145161290322582, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.259140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024485407066052265, "calib/mean_conf": 0.015078125000000001, "calib/mu_c": 0.01685714285714286, "calib/mu_w": 0.014408602150537632, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.000390625, "calib/std_conf": 0.009099802002481977, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.18709090909090909, "calib/step_q_c_n": 385.0, "calib/step_q_gap": 0.020935361240701056, "calib/step_q_w": 0.16615554785020803, "calib/step_q_w_n": 1442.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 310.3046875, "completions/mean_terminated_length": 311.5215759277344, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.18773333333333334, "grad_norm": 0.013478383421897888, "learning_rate": 6.666666666666667e-07, "loss": 0.0958, "num_tokens": 35793271.0, "reward": 0.9966421723365784, "reward_std": 0.027028311043977737, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7276594042778015, "rewards/format_reward_step": 0.9921875, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7781459763646126, "aux_distill/mean_u": 0.2034369505524209, "aux_distill/n_active_tok": 222.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5171164772727271, "calib/avg_num_step_conf": 7.0234375, "calib/ece": 0.29917968749999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003443181818181821, "calib/mean_conf": 0.0188671875, "calib/mu_c": 0.016499999999999997, "calib/mu_w": 0.019943181818181818, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0027734375, "calib/std_conf": 0.04424591631823036, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.16645539906103285, "calib/step_q_c_n": 426.0, "calib/step_q_gap": -0.020672880822349082, "calib/step_q_w": 0.18712827988338193, "calib/step_q_w_n": 1372.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1444.0, "completions/max_terminated_length": 1444.0, "completions/mean_length": 301.12109375, "completions/mean_terminated_length": 302.3019714355469, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.1888, "grad_norm": 0.013484400697052479, "learning_rate": 6.388888888888889e-07, "loss": 0.068, "num_tokens": 35974190.0, "reward": 1.0039993524551392, "reward_std": 0.007987409830093384, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6954988241195679, "rewards/format_reward_step": 1.0, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8372670179232955, "aux_distill/mean_u": 0.22859842521539914, "aux_distill/n_active_tok": 221.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5706492637215529, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.33625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003116465863453816, "calib/mean_conf": 0.015312500000000001, "calib/mu_c": 0.017333333333333333, "calib/mu_w": 0.014216867469879517, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008833308765689106, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21936416184971097, "calib/step_q_c_n": 519.0, "calib/step_q_gap": -0.012766910705494072, "calib/step_q_w": 0.23213107255520504, "calib/step_q_w_n": 1268.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1080.0, "completions/max_terminated_length": 1080.0, "completions/mean_length": 300.6171875, "completions/mean_terminated_length": 301.79608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.18986666666666666, "grad_norm": 0.014162966050207615, "learning_rate": 6.111111111111112e-07, "loss": 0.065, "num_tokens": 36157220.0, "reward": 1.0059374570846558, "reward_std": 0.007233859039843082, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6603125333786011, "rewards/format_reward_step": 1.0, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8423464596271515, "aux_distill/mean_u": 0.21650959811507547, "aux_distill/n_active_tok": 205.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5967660616537022, "calib/avg_num_step_conf": 6.47265625, "calib/ece": 0.289609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002469029098242587, "calib/mean_conf": 0.015078125000000001, "calib/mu_c": 0.0167948717948718, "calib/mu_w": 0.014325842696629213, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008052182715535892, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17574938574938576, "calib/step_q_c_n": 407.0, "calib/step_q_gap": -0.06633861425061424, "calib/step_q_w": 0.242088, "calib/step_q_w_n": 1250.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1040.0, "completions/max_terminated_length": 1040.0, "completions/mean_length": 285.65234375, "completions/mean_terminated_length": 286.7725524902344, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.19093333333333334, "grad_norm": 0.014211541041731834, "learning_rate": 5.833333333333334e-07, "loss": 0.0594, "num_tokens": 36336611.0, "reward": 1.0049710273742676, "reward_std": 0.004556154832243919, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7052546739578247, "rewards/format_reward_step": 1.0, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.801798083819449, "aux_distill/mean_u": 0.22506361019896942, "aux_distill/n_active_tok": 239.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5592140921409214, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.3385433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018780487804878065, "calib/mean_conf": 0.01578740157480315, "calib/mu_c": 0.017, "calib/mu_w": 0.015121951219512195, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00878270483802163, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21153817504655495, "calib/step_q_c_n": 537.0, "calib/step_q_gap": 0.00631151768437771, "calib/step_q_w": 0.20522665736217724, "calib/step_q_w_n": 1433.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 345.01953125, "completions/mean_terminated_length": 346.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.192, "grad_norm": 0.015591487288475037, "learning_rate": 5.555555555555555e-07, "loss": 0.0698, "num_tokens": 36528792.0, "reward": 0.9980021119117737, "reward_std": 0.028464868664741516, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6522542834281921, "rewards/format_reward_step": 0.9921875, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7981845401227474, "aux_distill/mean_u": 0.1806097187679103, "aux_distill/n_active_tok": 202.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5448807854137446, "calib/avg_num_step_conf": 6.3125, "calib/ece": 0.25356862745098036, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001105656848994857, "calib/mean_conf": 0.017019607843137254, "calib/mu_c": 0.01782608695652174, "calib/mu_w": 0.016720430107526885, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011804768388619833, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21485241730279897, "calib/step_q_c_n": 393.0, "calib/step_q_gap": -0.025214385640782377, "calib/step_q_w": 0.24006680294358135, "calib/step_q_w_n": 1223.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1254.0, "completions/max_terminated_length": 1254.0, "completions/mean_length": 264.6796875, "completions/mean_terminated_length": 265.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.19306666666666666, "grad_norm": 0.01583191752433777, "learning_rate": 5.277777777777779e-07, "loss": 0.0544, "num_tokens": 36702814.0, "reward": 1.0006847381591797, "reward_std": 0.01699506677687168, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7357445359230042, "rewards/format_reward_step": 0.99609375, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7627274738624692, "aux_distill/mean_u": 0.1822232042934428, "aux_distill/n_active_tok": 227.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5438858893915074, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.28545098039215683, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001837151612432513, "calib/mean_conf": 0.016509803921568627, "calib/mu_c": 0.017792207792207793, "calib/mu_w": 0.01595505617977528, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00965964426946115, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22081318681318682, "calib/step_q_c_n": 455.0, "calib/step_q_gap": -0.004984106529900223, "calib/step_q_w": 0.22579729334308704, "calib/step_q_w_n": 1367.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 321.87109375, "completions/mean_terminated_length": 321.87109375, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.19413333333333332, "grad_norm": 0.012693443335592747, "learning_rate": 5.000000000000001e-07, "loss": 0.1194, "num_tokens": 36891373.0, "reward": 0.9973569512367249, "reward_std": 0.02882469817996025, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.701745331287384, "rewards/format_reward_step": 0.9921875, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7574250753968954, "aux_distill/mean_u": 0.1943297293494716, "aux_distill/n_active_tok": 228.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4795465249276728, "calib/avg_num_step_conf": 7.21484375, "calib/ece": 0.3306640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009008948395344135, "calib/mean_conf": 0.016992187500000002, "calib/mu_c": 0.01640449438202247, "calib/mu_w": 0.017305389221556885, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009799550447078873, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18748726655348047, "calib/step_q_c_n": 589.0, "calib/step_q_gap": -0.06748069847036686, "calib/step_q_w": 0.25496796502384733, "calib/step_q_w_n": 1258.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 924.0, "completions/max_terminated_length": 924.0, "completions/mean_length": 306.62890625, "completions/mean_terminated_length": 307.8313903808594, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.1952, "grad_norm": 0.015803849324584007, "learning_rate": 4.7222222222222226e-07, "loss": 0.0647, "num_tokens": 37076550.0, "reward": 1.005510687828064, "reward_std": 0.006886619608849287, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6633652448654175, "rewards/format_reward_step": 1.0, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8180130543187261, "aux_distill/mean_u": 0.21504098794632884, "aux_distill/n_active_tok": 213.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4544132254860223, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.2993725490196078, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0023563218390804594, "calib/mean_conf": 0.01827450980392157, "calib/mu_c": 0.016666666666666666, "calib/mu_w": 0.019022988505747126, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01148584539194007, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18374944852941177, "calib/step_q_c_n": 544.0, "calib/step_q_gap": -0.015600423155789245, "calib/step_q_w": 0.19934987168520102, "calib/step_q_w_n": 1169.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 697.0, "completions/max_terminated_length": 697.0, "completions/mean_length": 295.5859375, "completions/mean_terminated_length": 296.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.19626666666666667, "grad_norm": 0.015823913738131523, "learning_rate": 4.444444444444445e-07, "loss": 0.0709, "num_tokens": 37257500.0, "reward": 1.0011351108551025, "reward_std": 0.01635909266769886, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6897703409194946, "rewards/format_reward_step": 0.99609375, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7792685413733125, "aux_distill/mean_u": 0.19762557337851924, "aux_distill/n_active_tok": 250.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5308478038815118, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.33318897637795275, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010166836908409964, "calib/mean_conf": 0.01720472440944882, "calib/mu_c": 0.017865168539325845, "calib/mu_w": 0.01684848484848485, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010408081795236494, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20600725952813068, "calib/step_q_c_n": 551.0, "calib/step_q_gap": 0.009205889665116956, "calib/step_q_w": 0.19680136986301372, "calib/step_q_w_n": 1460.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 326.39453125, "completions/mean_terminated_length": 327.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.19733333333333333, "grad_norm": 0.014444109983742237, "learning_rate": 4.1666666666666667e-07, "loss": 0.103, "num_tokens": 37447977.0, "reward": 0.9942923188209534, "reward_std": 0.039944350719451904, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6526472568511963, "rewards/format_reward_step": 0.98828125, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8481473177671432, "aux_distill/mean_u": 0.2418076824490043, "aux_distill/n_active_tok": 233.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6300784533482768, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.30854901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005070748108713923, "calib/mean_conf": 0.016941176470588237, "calib/mu_c": 0.02036144578313253, "calib/mu_w": 0.015290697674418607, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01029260493087031, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2827586206896551, "calib/step_q_c_n": 464.0, "calib/step_q_gap": 0.10086096610543338, "calib/step_q_w": 0.18189765458422175, "calib/step_q_w_n": 1407.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1993.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 324.77734375, "completions/mean_terminated_length": 326.0509948730469, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.1984, "grad_norm": 0.014936697669327259, "learning_rate": 3.8888888888888895e-07, "loss": 0.0918, "num_tokens": 37636160.0, "reward": 1.0024995803833008, "reward_std": 0.018394682556390762, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6846867799758911, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7872933838516474, "aux_distill/mean_u": 0.18320702169394124, "aux_distill/n_active_tok": 254.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5531104697771364, "calib/avg_num_step_conf": 7.94921875, "calib/ece": 0.24219607843137259, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018927368927368939, "calib/mean_conf": 0.01662745098039216, "calib/mu_c": 0.018030303030303032, "calib/mu_w": 0.016137566137566138, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009761791832292309, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22751269035532995, "calib/step_q_c_n": 394.0, "calib/step_q_gap": 0.0014950182042025861, "calib/step_q_w": 0.22601767215112736, "calib/step_q_w_n": 1641.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 343.58984375, "completions/mean_terminated_length": 343.58984375, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.19946666666666665, "grad_norm": 0.012962001375854015, "learning_rate": 3.611111111111111e-07, "loss": 0.1133, "num_tokens": 37825663.0, "reward": 1.0005570650100708, "reward_std": 0.016831982880830765, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7472078800201416, "rewards/format_reward_step": 0.99609375, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8266328135505319, "aux_distill/mean_u": 0.23075207178941026, "aux_distill/n_active_tok": 220.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5012258335668254, "calib/avg_num_step_conf": 7.19140625, "calib/ece": 0.30701960784313725, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015529560100868596, "calib/mean_conf": 0.01847058823529412, "calib/mu_c": 0.019518072289156627, "calib/mu_w": 0.017965116279069767, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012971088425729619, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.15902970297029703, "calib/step_q_c_n": 505.0, "calib/step_q_gap": -0.0739418539159305, "calib/step_q_w": 0.23297155688622753, "calib/step_q_w_n": 1336.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 315.6171875, "completions/mean_terminated_length": 316.85491943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.20053333333333334, "grad_norm": 0.01490770187228918, "learning_rate": 3.3333333333333335e-07, "loss": 0.0597, "num_tokens": 38010533.0, "reward": 1.0021681785583496, "reward_std": 0.019167276099324226, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6840238571166992, "rewards/format_reward_step": 0.99609375, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.785682198125869, "aux_distill/mean_u": 0.20517065482001212, "aux_distill/n_active_tok": 218.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5821377840909091, "calib/avg_num_step_conf": 6.87890625, "calib/ece": 0.2951171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034431818181818175, "calib/mean_conf": 0.0173828125, "calib/mu_c": 0.01975, "calib/mu_w": 0.016306818181818183, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011849723819137885, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22650107991360688, "calib/step_q_c_n": 463.0, "calib/step_q_gap": 0.03564668854226635, "calib/step_q_w": 0.19085439137134053, "calib/step_q_w_n": 1298.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 853.0, "completions/max_terminated_length": 853.0, "completions/mean_length": 294.375, "completions/mean_terminated_length": 295.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.2016, "grad_norm": 0.014363201335072517, "learning_rate": 3.055555555555556e-07, "loss": 0.0937, "num_tokens": 38193661.0, "reward": 1.0059505701065063, "reward_std": 0.00714213028550148, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6994011402130127, "rewards/format_reward_step": 1.0, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8067413792014122, "aux_distill/mean_u": 0.1998867498130261, "aux_distill/n_active_tok": 258.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5696138996138996, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.25670588235294123, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002830115830115835, "calib/mean_conf": 0.01780392156862745, "calib/mu_c": 0.01985714285714286, "calib/mu_w": 0.017027027027027027, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010246031480133552, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21214574898785424, "calib/step_q_c_n": 494.0, "calib/step_q_gap": -0.022446230642954146, "calib/step_q_w": 0.23459197963080838, "calib/step_q_w_n": 1571.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1483.0, "completions/max_terminated_length": 1483.0, "completions/mean_length": 345.1640625, "completions/mean_terminated_length": 346.5176696777344, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.20266666666666666, "grad_norm": 0.012308570556342602, "learning_rate": 2.7777777777777776e-07, "loss": 0.0772, "num_tokens": 38387631.0, "reward": 0.9974077939987183, "reward_std": 0.028176523745059967, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7291905879974365, "rewards/format_reward_step": 0.9921875, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7780831595882773, "aux_distill/mean_u": 0.18509156854576636, "aux_distill/n_active_tok": 242.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5230202578268878, "calib/avg_num_step_conf": 7.640625, "calib/ece": 0.275546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004405156537753245, "calib/mean_conf": 0.017421875, "calib/mu_c": 0.017733333333333334, "calib/mu_w": 0.01729281767955801, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010513837143706146, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23987234042553193, "calib/step_q_c_n": 470.0, "calib/step_q_gap": 0.03117785859511471, "calib/step_q_w": 0.20869448183041722, "calib/step_q_w_n": 1486.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1188.0, "completions/max_terminated_length": 1188.0, "completions/mean_length": 322.1484375, "completions/mean_terminated_length": 323.4117736816406, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.20373333333333332, "grad_norm": 0.014701644890010357, "learning_rate": 2.5000000000000004e-07, "loss": 0.0938, "num_tokens": 38574269.0, "reward": 1.004988193511963, "reward_std": 0.005673405714333057, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7170077562332153, "rewards/format_reward_step": 1.0, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.7313472367823124, "aux_distill/mean_u": 0.17417473060254704, "aux_distill/n_active_tok": 232.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.612087912087912, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.336953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006053946053946047, "calib/mean_conf": 0.018515625000000004, "calib/mu_c": 0.02241758241758241, "calib/mu_w": 0.016363636363636365, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01349709712713719, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17961389961389962, "calib/step_q_c_n": 518.0, "calib/step_q_gap": -0.03224044353039329, "calib/step_q_w": 0.2118543431442929, "calib/step_q_w_n": 1393.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1295.0, "completions/max_terminated_length": 1295.0, "completions/mean_length": 321.51171875, "completions/mean_terminated_length": 322.7725830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.2048, "grad_norm": 0.016461456194519997, "learning_rate": 2.2222222222222224e-07, "loss": 0.0852, "num_tokens": 38761552.0, "reward": 1.0077061653137207, "reward_std": 0.008474325761198997, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.659943699836731, "rewards/format_reward_step": 1.0, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8442441988736391, "aux_distill/mean_u": 0.21636011236901698, "aux_distill/n_active_tok": 231.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49377264492753636, "calib/avg_num_step_conf": 7.33203125, "calib/ece": 0.2634375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008212560386473382, "calib/mean_conf": 0.017812500000000002, "calib/mu_c": 0.017222222222222226, "calib/mu_w": 0.018043478260869564, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.01185442296149416, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24503211991434687, "calib/step_q_c_n": 467.0, "calib/step_q_gap": 0.016645027715765315, "calib/step_q_w": 0.22838709219858155, "calib/step_q_w_n": 1410.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 314.51953125, "completions/mean_terminated_length": 315.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.20586666666666667, "grad_norm": 0.018526718020439148, "learning_rate": 1.9444444444444447e-07, "loss": 0.0655, "num_tokens": 38947781.0, "reward": 1.000710368156433, "reward_std": 0.017061732709407806, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7240769863128662, "rewards/format_reward_step": 0.99609375, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8148846421390772, "aux_distill/mean_u": 0.21376291768715036, "aux_distill/n_active_tok": 208.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5002201257861636, "calib/avg_num_step_conf": 6.5390625, "calib/ece": 0.3948046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -2.138364779874155e-05, "calib/mean_conf": 0.019257812500000006, "calib/mu_c": 0.019245283018867926, "calib/mu_w": 0.019266666666666668, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.011483511340824449, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23845024469820558, "calib/step_q_c_n": 613.0, "calib/step_q_gap": 0.0560185764606938, "calib/step_q_w": 0.18243166823751178, "calib/step_q_w_n": 1061.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 824.0, "completions/max_terminated_length": 824.0, "completions/mean_length": 281.90625, "completions/mean_terminated_length": 283.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.20693333333333333, "grad_norm": 0.018510188907384872, "learning_rate": 1.6666666666666668e-07, "loss": 0.0671, "num_tokens": 39125893.0, "reward": 1.003811240196228, "reward_std": 0.01898595690727234, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5974664092063904, "rewards/format_reward_step": 0.99609375, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8549988893792033, "aux_distill/mean_u": 0.248499681289213, "aux_distill/n_active_tok": 221.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5184979358526516, "calib/avg_num_step_conf": 7.125, "calib/ece": 0.24529411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006232137186408387, "calib/mean_conf": 0.017450980392156864, "calib/mu_c": 0.01791044776119403, "calib/mu_w": 0.017287234042553192, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010854318243400978, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18694690265486727, "calib/step_q_c_n": 452.0, "calib/step_q_gap": -0.028434511339301805, "calib/step_q_w": 0.21538141399416907, "calib/step_q_w_n": 1372.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1769.0, "completions/max_terminated_length": 1769.0, "completions/mean_length": 318.50390625, "completions/mean_terminated_length": 319.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.208, "grad_norm": 0.01629587449133396, "learning_rate": 1.3888888888888888e-07, "loss": 0.0841, "num_tokens": 39313414.0, "reward": 1.0005707740783691, "reward_std": 0.016977444291114807, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7433292865753174, "rewards/format_reward_step": 0.99609375, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.832436092197895, "aux_distill/mean_u": 0.22138925953920585, "aux_distill/n_active_tok": 210.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5842329545454547, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.2933203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0028295454545454526, "calib/mean_conf": 0.0191796875, "calib/mu_c": 0.021124999999999998, "calib/mu_w": 0.018295454545454545, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011062276547001695, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22217864923747274, "calib/step_q_c_n": 459.0, "calib/step_q_gap": 0.014548690053799274, "calib/step_q_w": 0.20762995918367347, "calib/step_q_w_n": 1225.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 810.0, "completions/max_terminated_length": 810.0, "completions/mean_length": 274.28125, "completions/mean_terminated_length": 275.35687255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.20906666666666668, "grad_norm": 0.015408625826239586, "learning_rate": 1.1111111111111112e-07, "loss": 0.0717, "num_tokens": 39486174.0, "reward": 1.0063564777374268, "reward_std": 0.00686432421207428, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.7002128958702087, "rewards/format_reward_step": 1.0, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8513198988512158, "aux_distill/mean_u": 0.2619026741844905, "aux_distill/n_active_tok": 218.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5108518518518519, "calib/avg_num_step_conf": 6.82421875, "calib/ece": 0.27505882352941174, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003666666666666714, "calib/mean_conf": 0.01905882352941177, "calib/mu_c": 0.018799999999999997, "calib/mu_w": 0.01916666666666667, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012394514543651142, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18097387173396673, "calib/step_q_c_n": 421.0, "calib/step_q_gap": -0.052125675777345504, "calib/step_q_w": 0.23309954751131223, "calib/step_q_w_n": 1326.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 292.44921875, "completions/mean_terminated_length": 292.44921875, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.21013333333333334, "grad_norm": 0.016117248684167862, "learning_rate": 8.333333333333334e-08, "loss": 0.1364, "num_tokens": 39666097.0, "reward": 1.001344084739685, "reward_std": 0.017887791618704796, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7136257886886597, "rewards/format_reward_step": 0.99609375, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8347745090723038, "aux_distill/mean_u": 0.18832998660862468, "aux_distill/n_active_tok": 217.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5501277139208173, "calib/avg_num_step_conf": 6.80859375, "calib/ece": 0.2989019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016581524052788413, "calib/mean_conf": 0.01874509803921569, "calib/mu_c": 0.019876543209876543, "calib/mu_w": 0.018218390804597702, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01151272635510995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20193293885601576, "calib/step_q_c_n": 507.0, "calib/step_q_gap": -0.0051042779724632115, "calib/step_q_w": 0.20703721682847898, "calib/step_q_w_n": 1236.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 815.0, "completions/max_terminated_length": 815.0, "completions/mean_length": 284.28125, "completions/mean_terminated_length": 285.3960876464844, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.2112, "grad_norm": 0.014969132840633392, "learning_rate": 5.555555555555556e-08, "loss": 0.0785, "num_tokens": 39844257.0, "reward": 1.0040948390960693, "reward_std": 0.012611201032996178, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.691783607006073, "rewards/format_reward_step": 0.99609375, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8050779681652784, "aux_distill/mean_u": 0.22997851558929855, "aux_distill/n_active_tok": 214.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5454513596860108, "calib/avg_num_step_conf": 6.7578125, "calib/ece": 0.301015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015727502102607198, "calib/mean_conf": 0.019296875, "calib/mu_c": 0.020365853658536586, "calib/mu_w": 0.018793103448275866, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012448870038456301, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22131524008350728, "calib/step_q_c_n": 479.0, "calib/step_q_gap": 0.0123304279332275, "calib/step_q_w": 0.20898481215027978, "calib/step_q_w_n": 1251.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 943.0, "completions/max_terminated_length": 943.0, "completions/mean_length": 299.14453125, "completions/mean_terminated_length": 300.3176574707031, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.21226666666666666, "grad_norm": 0.016602490097284317, "learning_rate": 2.777777777777778e-08, "loss": 0.1056, "num_tokens": 40025038.0, "reward": 1.0062596797943115, "reward_std": 0.008099588565528393, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6922070384025574, "rewards/format_reward_step": 1.0, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8087783623486757, "aux_distill/mean_u": 0.21598880375647106, "aux_distill/n_active_tok": 220.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6062042705689114, "calib/avg_num_step_conf": 6.88671875, "calib/ece": 0.27082352941176474, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006024339256383453, "calib/mean_conf": 0.019372549019607846, "calib/mu_c": 0.02364864864864865, "calib/mu_w": 0.017624309392265197, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013988736062981313, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23423076923076921, "calib/step_q_c_n": 416.0, "calib/step_q_gap": 0.03253158586031635, "calib/step_q_w": 0.20169918337045287, "calib/step_q_w_n": 1347.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1928.0, "completions/max_terminated_length": 1928.0, "completions/mean_length": 316.23828125, "completions/mean_terminated_length": 316.23828125, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.21333333333333335, "grad_norm": 0.013604212552309036, "learning_rate": 0.0, "loss": 0.0891, "num_tokens": 40214043.0, "reward": 1.0026452541351318, "reward_std": 0.01684332825243473, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7201343774795532, "rewards/format_reward_step": 0.99609375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.09831852782517672, "train_runtime": 18030.7973, "train_samples_per_second": 2.84, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 40214043, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }