{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.2309403103940628, "aux_distill/mean_u": 0.40830236726303903, "aux_distill/n_active_tok": 26.11764705882353, "calib/answer_extract_rate": 0.140625, "calib/auroc": 0.2954545454545454, "calib/avg_num_step_conf": 0.43359375, "calib/ece": 0.5410526315789472, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": -0.04511363636363652, "calib/mean_conf": 0.9073684210526317, "calib/mu_c": 0.88125, "calib/mu_w": 0.9263636363636365, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1484375, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.5136842105263156, "calib/std_conf": 0.140069234957993, "calib/step_conf_rate": 0.08984375, "calib/step_q_c": 0.855, "calib/step_q_c_n": 30.0, "calib/step_q_gap": 0.07586419753086415, "calib/step_q_w": 0.7791358024691358, "calib/step_q_w_n": 81.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 639.21875, "completions/mean_terminated_length": 714.5851440429688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.013296756893396378, "learning_rate": 2.5000000000000004e-07, "loss": 0.0803, "num_tokens": 295032.0, "reward": 0.06443203240633011, "reward_std": 0.14603616297245026, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.03120781108736992, "rewards/format_reward_step": 0.06640625, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.2242749134699504, "aux_distill/mean_u": 0.22649021059963792, "aux_distill/n_active_tok": 21.77777777777778, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.45535714285714285, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.7616666666666666, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.12250000000000016, "calib/mean_conf": 0.9227777777777777, "calib/mu_c": 0.8275, "calib/mu_w": 0.9500000000000002, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.731111111111111, "calib/std_conf": 0.11882786595910515, "calib/step_conf_rate": 0.08984375, "calib/step_q_c": 0.6353333333333334, "calib/step_q_c_n": 15.0, "calib/step_q_gap": -0.11840160642570263, "calib/step_q_w": 0.753734939759036, "calib/step_q_w_n": 83.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 671.4140625, "completions/mean_terminated_length": 750.576416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.017615454271435738, "learning_rate": 5.000000000000001e-07, "loss": 0.1193, "num_tokens": 594010.0, "reward": 0.04443163797259331, "reward_std": 0.08920705318450928, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.014644531533122063, "rewards/format_reward_step": 0.0546875, "step": 2 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.3238900985036577, "aux_distill/mean_u": 0.29028764932618667, "aux_distill/n_active_tok": 18.857142857142858, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.6666666666666667, "calib/avg_num_step_conf": 0.12890625, "calib/ece": 0.7514285714285714, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.08833333333333326, "calib/mean_conf": 0.8942857142857144, "calib/mu_c": 0.97, "calib/mu_w": 0.8816666666666667, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.02734375, "calib/pce": 0.7514285714285714, "calib/std_conf": 0.16378183597676013, "calib/step_conf_rate": 0.02734375, "calib/step_q_c": 0.87875, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.03194999999999992, "calib/step_q_w": 0.8468000000000001, "calib/step_q_w_n": 25.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 702.1953125, "completions/mean_terminated_length": 802.5089721679688, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0032, "grad_norm": 0.008602521382272243, "learning_rate": 7.5e-07, "loss": 0.0409, "num_tokens": 902836.0, "reward": 0.017878906801342964, "reward_std": 0.04549115151166916, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.008414062671363354, "rewards/format_reward_step": 0.0234375, "step": 3 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.3307591259479523, "aux_distill/mean_u": 0.2824160253664306, "aux_distill/n_active_tok": 20.4, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.09090909090909094, "calib/avg_num_step_conf": 0.19921875, "calib/ece": 0.8541666666666666, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.040909090909091006, "calib/mean_conf": 0.9375, "calib/mu_c": 0.9, "calib/mu_w": 0.940909090909091, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.8541666666666666, "calib/std_conf": 0.06546309392830946, "calib/step_conf_rate": 0.04296875, "calib/step_q_c": 0.8671428571428571, "calib/step_q_c_n": 7.0, "calib/step_q_gap": 0.10123376623376612, "calib/step_q_w": 0.765909090909091, "calib/step_q_w_n": 44.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 749.94140625, "completions/mean_terminated_length": 842.0394897460938, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.004266666666666667, "grad_norm": 0.009631999768316746, "learning_rate": 1.0000000000000002e-06, "loss": 0.043, "num_tokens": 1224797.0, "reward": 0.025254102423787117, "reward_std": 0.055353712290525436, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.007539453450590372, "rewards/format_reward_step": 0.03515625, "step": 4 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.2394245167573292, "aux_distill/mean_u": 0.3190632179208172, "aux_distill/n_active_tok": 20.0, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.7857142857142857, "calib/avg_num_step_conf": 0.23828125, "calib/ece": 0.85, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.017142857142857015, "calib/mean_conf": 0.975, "calib/mu_c": 0.99, "calib/mu_w": 0.972857142857143, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.85, "calib/std_conf": 0.016583123951777013, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.8677777777777779, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.054508547008547126, "calib/step_q_w": 0.8132692307692307, "calib/step_q_w_n": 52.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 2903.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 791.6171875, "completions/mean_terminated_length": 884.9519653320312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.010286133736371994, "learning_rate": 1.25e-06, "loss": 0.0523, "num_tokens": 1557947.0, "reward": 0.020259374752640724, "reward_std": 0.04438989982008934, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.005362499970942736, "rewards/format_reward_step": 0.03125, "step": 5 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.355362464984258, "aux_distill/mean_u": 0.2270808669147952, "aux_distill/n_active_tok": 21.333333333333332, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.6590909090909092, "calib/avg_num_step_conf": 0.25, "calib/ece": 0.796923076923077, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": 0.02863636363636357, "calib/mean_conf": 0.9507692307692307, "calib/mu_c": 0.975, "calib/mu_w": 0.9463636363636364, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.796923076923077, "calib/std_conf": 0.04322775790223728, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8838461538461538, "calib/step_q_c_n": 13.0, "calib/step_q_gap": 0.03992458521870268, "calib/step_q_w": 0.8439215686274512, "calib/step_q_w_n": 51.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 666.00390625, "completions/mean_terminated_length": 722.4449462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.014194191433489323, "learning_rate": 1.5e-06, "loss": 0.0803, "num_tokens": 1858204.0, "reward": 0.02712968736886978, "reward_std": 0.049767978489398956, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.011290624737739563, "rewards/format_reward_step": 0.03515625, "step": 6 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.3300063119215124, "aux_distill/mean_u": 0.376707364389986, "aux_distill/n_active_tok": 27.764705882352942, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.625, "calib/avg_num_step_conf": 0.4609375, "calib/ece": 0.7336842105263157, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.7368421052631579, "calib/gap": 0.08125000000000004, "calib/mean_conf": 0.8915789473684211, "calib/mu_c": 0.96, "calib/mu_w": 0.8787499999999999, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.7336842105263157, "calib/std_conf": 0.14808472531823183, "calib/step_conf_rate": 0.08984375, "calib/step_q_c": 0.8224999999999998, "calib/step_q_c_n": 20.0, "calib/step_q_gap": 0.03698979591836704, "calib/step_q_w": 0.7855102040816327, "calib/step_q_w_n": 98.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 744.8203125, "completions/mean_terminated_length": 821.8706665039062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.007466666666666667, "grad_norm": 0.016747502610087395, "learning_rate": 1.75e-06, "loss": 0.1105, "num_tokens": 2180110.0, "reward": 0.04752578213810921, "reward_std": 0.10274682939052582, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.020832812413573265, "rewards/format_reward_step": 0.0625, "step": 7 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.3433888256549835, "aux_distill/mean_u": 0.26888269331964815, "aux_distill/n_active_tok": 23.666666666666668, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.7708333333333333, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.357, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": 0.036666666666666625, "calib/mean_conf": 0.9570000000000001, "calib/mu_c": 0.9716666666666667, "calib/mu_w": 0.935, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.08984375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.357, "calib/std_conf": 0.03606937759374285, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8213636363636365, "calib/step_q_c_n": 22.0, "calib/step_q_gap": 0.1434044526901671, "calib/step_q_w": 0.6779591836734694, "calib/step_q_w_n": 49.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 616.0390625, "completions/mean_terminated_length": 694.7400512695312, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.01233338937163353, "learning_rate": 2.0000000000000003e-06, "loss": 0.079, "num_tokens": 2468136.0, "reward": 0.03779961168766022, "reward_std": 0.09547053277492523, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.020911719650030136, "rewards/format_reward_step": 0.03125, "step": 8 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.2926430225372314, "aux_distill/mean_u": 0.2499272264449756, "aux_distill/n_active_tok": 17.6, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.7222222222222222, "calib/avg_num_step_conf": 0.171875, "calib/ece": 0.663, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": 0.24111111111111116, "calib/mean_conf": 0.763, "calib/mu_c": 0.98, "calib/mu_w": 0.7388888888888888, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.663, "calib/std_conf": 0.31752322749682427, "calib/step_conf_rate": 0.04296875, "calib/step_q_c": 0.8866666666666667, "calib/step_q_c_n": 3.0, "calib/step_q_gap": 0.0964227642276424, "calib/step_q_w": 0.7902439024390243, "calib/step_q_w_n": 41.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 700.2109375, "completions/mean_terminated_length": 800.2410888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.010681779123842716, "learning_rate": 2.25e-06, "loss": 0.0393, "num_tokens": 2778734.0, "reward": 0.025644725188612938, "reward_std": 0.043472521007061005, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.0122269531711936, "rewards/format_reward_step": 0.03515625, "step": 9 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.3319018862464211, "aux_distill/mean_u": 0.20504655473969555, "aux_distill/n_active_tok": 17.454545454545453, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.8076923076923077, "calib/avg_num_step_conf": 0.19140625, "calib/ece": 0.7885714285714284, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": 0.11846153846153828, "calib/mean_conf": 0.8600000000000001, "calib/mu_c": 0.97, "calib/mu_w": 0.8515384615384617, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.7885714285714284, "calib/std_conf": 0.24848110017234135, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.754, "calib/step_q_c_n": 5.0, "calib/step_q_gap": -0.06918181818181812, "calib/step_q_w": 0.8231818181818181, "calib/step_q_w_n": 44.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 648.3046875, "completions/mean_terminated_length": 697.336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.010666666666666666, "grad_norm": 0.013561662286520004, "learning_rate": 2.5e-06, "loss": 0.0141, "num_tokens": 3075308.0, "reward": 0.02494199201464653, "reward_std": 0.0662628710269928, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.006915234960615635, "rewards/format_reward_step": 0.03515625, "step": 10 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 1.142171167410337, "aux_distill/mean_u": 0.24344688370614387, "aux_distill/n_active_tok": 18.76923076923077, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.4875, "calib/avg_num_step_conf": 0.2421875, "calib/ece": 0.662857142857143, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.7857142857142857, "calib/gap": 0.012500000000000067, "calib/mean_conf": 0.9485714285714286, "calib/mu_c": 0.9575, "calib/mu_w": 0.945, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.662857142857143, "calib/std_conf": 0.06322941707523065, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.9633333333333333, "calib/step_q_c_n": 3.0, "calib/step_q_gap": 0.2759195402298851, "calib/step_q_w": 0.6874137931034482, "calib/step_q_w_n": 58.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3046.0, "completions/max_terminated_length": 3046.0, "completions/mean_length": 745.5625, "completions/mean_terminated_length": 808.7457885742188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.011814948171377182, "learning_rate": 2.7500000000000004e-06, "loss": 0.0691, "num_tokens": 3394460.0, "reward": 0.034773632884025574, "reward_std": 0.07486072927713394, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.010953515768051147, "rewards/format_reward_step": 0.03515625, "step": 11 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.3093594085602533, "aux_distill/mean_u": 0.370681781070378, "aux_distill/n_active_tok": 23.428571428571427, "calib/answer_extract_rate": 0.1796875, "calib/auroc": 0.5755208333333333, "calib/avg_num_step_conf": 0.49609375, "calib/ece": 0.6875, "calib/final_conf_rate": 0.125, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.8125, "calib/gap": -0.015833333333333255, "calib/mean_conf": 0.893125, "calib/mu_c": 0.88125, "calib/mu_w": 0.8970833333333332, "calib/nonempty_final_conf_rate": 0.125, "calib/nonempty_reasoning_rate": 0.203125, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6653125, "calib/std_conf": 0.17207261366934598, "calib/step_conf_rate": 0.1171875, "calib/step_q_c": 0.7972727272727274, "calib/step_q_c_n": 33.0, "calib/step_q_gap": 0.0195067698259187, "calib/step_q_w": 0.7777659574468087, "calib/step_q_w_n": 94.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 742.7421875, "completions/mean_terminated_length": 795.5731811523438, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0128, "grad_norm": 0.01771925762295723, "learning_rate": 3e-06, "loss": 0.1562, "num_tokens": 3712586.0, "reward": 0.07837206870317459, "reward_std": 0.16047851741313934, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.03565039113163948, "rewards/format_reward_step": 0.0859375, "step": 12 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 1.1611148168643315, "aux_distill/mean_u": 0.23878970391290485, "aux_distill/n_active_tok": 26.333333333333332, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.5727272727272728, "calib/avg_num_step_conf": 0.32421875, "calib/ece": 0.574375, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.10636363636363633, "calib/mean_conf": 0.8868750000000001, "calib/mu_c": 0.96, "calib/mu_w": 0.8536363636363636, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.574375, "calib/std_conf": 0.16559433678420285, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.6991176470588236, "calib/step_q_c_n": 17.0, "calib/step_q_gap": -0.08073083778966128, "calib/step_q_w": 0.7798484848484849, "calib/step_q_w_n": 66.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 697.2421875, "completions/mean_terminated_length": 753.1392211914062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.013866666666666666, "grad_norm": 0.00980466604232788, "learning_rate": 3.2500000000000002e-06, "loss": 0.0487, "num_tokens": 4019480.0, "reward": 0.0430777333676815, "reward_std": 0.10306745767593384, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.019749218598008156, "rewards/format_reward_step": 0.046875, "step": 13 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.1014714141686757, "aux_distill/mean_u": 0.2636372189654983, "aux_distill/n_active_tok": 21.555555555555557, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.6818181818181819, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.5940769230769232, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.23076923076923078, "calib/gap": 0.0892272727272726, "calib/mean_conf": 0.671, "calib/mu_c": 0.7464999999999999, "calib/mu_w": 0.6572727272727273, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.5556153846153846, "calib/std_conf": 0.2858477270802115, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.554142857142857, "calib/step_q_c_n": 7.0, "calib/step_q_gap": 0.13351538461538448, "calib/step_q_w": 0.42062747252747257, "calib/step_q_w_n": 91.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 786.0078125, "completions/mean_terminated_length": 856.2467651367188, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.014933333333333333, "grad_norm": 0.010665432550013065, "learning_rate": 3.5e-06, "loss": 0.1084, "num_tokens": 4349906.0, "reward": 0.03924267739057541, "reward_std": 0.08956538140773773, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.023797854781150818, "rewards/format_reward_step": 0.04296875, "step": 14 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.1782517830530803, "aux_distill/mean_u": 0.37329411304752624, "aux_distill/n_active_tok": 25.5, "calib/answer_extract_rate": 0.171875, "calib/auroc": 0.7395833333333333, "calib/avg_num_step_conf": 0.62109375, "calib/ece": 0.7945000000000002, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.6538461538461539, "calib/gap": 0.10679166666666673, "calib/mean_conf": 0.8714230769230771, "calib/mu_c": 0.97, "calib/mu_w": 0.8632083333333332, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.21875, "calib/nonempty_step_conf_rate": 0.140625, "calib/pce": 0.7945000000000002, "calib/std_conf": 0.17340663040223006, "calib/step_conf_rate": 0.140625, "calib/step_q_w": 0.718860759493671, "calib/step_q_w_n": 158.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 665.02734375, "completions/mean_terminated_length": 718.3417358398438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.018191225826740265, "learning_rate": 3.7500000000000005e-06, "loss": 0.1084, "num_tokens": 4651841.0, "reward": 0.04167646914720535, "reward_std": 0.09166643768548965, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013040433637797832, "rewards/format_reward_step": 0.0625, "step": 15 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.0293893954333138, "aux_distill/mean_u": 0.4007917690219686, "aux_distill/n_active_tok": 22.352941176470587, "calib/answer_extract_rate": 0.12109375, "calib/auroc": 0.1911764705882353, "calib/avg_num_step_conf": 0.375, "calib/ece": 0.7378947368421053, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.5789473684210527, "calib/gap": -0.12735294117647045, "calib/mean_conf": 0.7989473684210526, "calib/mu_c": 0.685, "calib/mu_w": 0.8123529411764705, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.7157894736842105, "calib/std_conf": 0.2579044036856355, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.5181818181818183, "calib/step_q_c_n": 11.0, "calib/step_q_gap": -0.0858181818181818, "calib/step_q_w": 0.6040000000000001, "calib/step_q_w_n": 85.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 2929.0, "completions/max_terminated_length": 2929.0, "completions/mean_length": 628.9453125, "completions/mean_terminated_length": 725.270263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.011694777756929398, "learning_rate": 4.000000000000001e-06, "loss": 0.0626, "num_tokens": 4945507.0, "reward": 0.044773828238248825, "reward_std": 0.10467442870140076, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01923515647649765, "rewards/format_reward_step": 0.05859375, "step": 16 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 1.0415730786820252, "aux_distill/mean_u": 0.2799439116063525, "aux_distill/n_active_tok": 28.0, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.6759259259259259, "calib/avg_num_step_conf": 0.65625, "calib/ece": 0.45083333333333336, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.2916666666666667, "calib/gap": 0.16666666666666685, "calib/mean_conf": 0.6516666666666667, "calib/mu_c": 0.7766666666666667, "calib/mu_w": 0.6099999999999999, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.203125, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.42625, "calib/std_conf": 0.3062497165531568, "calib/step_conf_rate": 0.14453125, "calib/step_q_c": 0.38839629629629624, "calib/step_q_c_n": 27.0, "calib/step_q_gap": -0.0620292356185973, "calib/step_q_w": 0.45042553191489354, "calib/step_q_w_n": 141.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 688.16015625, "completions/mean_terminated_length": 740.2059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.011651230975985527, "learning_rate": 4.25e-06, "loss": 0.1347, "num_tokens": 5249012.0, "reward": 0.07471387088298798, "reward_std": 0.16528142988681793, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.051771484315395355, "rewards/format_reward_step": 0.07421875, "step": 17 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 1.0446974494877983, "aux_distill/mean_u": 0.33307486253661095, "aux_distill/n_active_tok": 23.529411764705884, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.6176470588235294, "calib/avg_num_step_conf": 0.390625, "calib/ece": 0.5987373737373737, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": 0.2601604278074868, "calib/mean_conf": 0.6542929292929293, "calib/mu_c": 0.9, "calib/mu_w": 0.6398395721925132, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.5987373737373737, "calib/std_conf": 0.3442238585416577, "calib/step_conf_rate": 0.09765625, "calib/step_q_c": 0.6000000000000001, "calib/step_q_c_n": 8.0, "calib/step_q_gap": 0.008249456521739251, "calib/step_q_w": 0.5917505434782608, "calib/step_q_w_n": 92.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 748.33203125, "completions/mean_terminated_length": 801.5606689453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.007760821841657162, "learning_rate": 4.5e-06, "loss": 0.0604, "num_tokens": 5575113.0, "reward": 0.042026352137327194, "reward_std": 0.0855361744761467, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.02936520427465439, "rewards/format_reward_step": 0.046875, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 1.0255909500450924, "aux_distill/mean_u": 0.387873029046073, "aux_distill/n_active_tok": 48.689655172413794, "calib/answer_extract_rate": 0.31640625, "calib/auroc": 0.3815789473684211, "calib/avg_num_step_conf": 1.390625, "calib/ece": 0.39595238095238094, "calib/final_conf_rate": 0.1640625, "calib/format_rate": 0.15234375, "calib/frac_conf_gt_0.9": 0.14285714285714285, "calib/gap": -0.1797368421052632, "calib/mean_conf": 0.47261904761904766, "calib/mu_c": 0.31000000000000005, "calib/mu_w": 0.48973684210526325, "calib/nonempty_final_conf_rate": 0.1640625, "calib/nonempty_reasoning_rate": 0.41796875, "calib/nonempty_step_conf_rate": 0.32421875, "calib/pce": 0.3866666666666666, "calib/std_conf": 0.34127254520892636, "calib/step_conf_rate": 0.32421875, "calib/step_q_c": 0.36454545454545456, "calib/step_q_c_n": 33.0, "calib/step_q_gap": 0.03782997466929355, "calib/step_q_w": 0.326715479876161, "calib/step_q_w_n": 323.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2840.0, "completions/max_terminated_length": 2840.0, "completions/mean_length": 646.0625, "completions/mean_terminated_length": 675.0693359375, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.020266666666666665, "grad_norm": 0.01326060201972723, "learning_rate": 4.75e-06, "loss": 0.1531, "num_tokens": 5869073.0, "reward": 0.14051249623298645, "reward_std": 0.28107649087905884, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.0974312573671341, "rewards/format_reward_step": 0.15234375, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 1.0279449429363012, "aux_distill/mean_u": 0.3457623021266588, "aux_distill/n_active_tok": 70.375, "calib/answer_extract_rate": 0.4140625, "calib/auroc": 0.4934065934065934, "calib/avg_num_step_conf": 2.20703125, "calib/ece": 0.3231595511392405, "calib/final_conf_rate": 0.30859375, "calib/format_rate": 0.2421875, "calib/frac_conf_gt_0.9": 0.1518987341772152, "calib/gap": -0.02933201806593405, "calib/mean_conf": 0.35369108177215186, "calib/mu_c": 0.3295571428571428, "calib/mu_w": 0.3588891609230769, "calib/nonempty_final_conf_rate": 0.30859375, "calib/nonempty_reasoning_rate": 0.578125, "calib/nonempty_step_conf_rate": 0.47265625, "calib/pce": 0.24981772151898735, "calib/std_conf": 0.3633414831600973, "calib/step_conf_rate": 0.47265625, "calib/step_q_c": 0.27234264705882355, "calib/step_q_c_n": 68.0, "calib/step_q_gap": -0.02546191469868564, "calib/step_q_w": 0.2978045617575092, "calib/step_q_w_n": 497.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 3032.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 606.62109375, "completions/mean_terminated_length": 644.3776245117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.013194161467254162, "learning_rate": 5e-06, "loss": 0.2451, "num_tokens": 6153048.0, "reward": 0.23589837551116943, "reward_std": 0.35146909952163696, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.16320298612117767, "rewards/format_reward_step": 0.2421875, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9818312600255013, "aux_distill/mean_u": 0.3322417602864148, "aux_distill/n_active_tok": 90.125, "calib/answer_extract_rate": 0.51953125, "calib/auroc": 0.42969924812030086, "calib/avg_num_step_conf": 2.8203125, "calib/ece": 0.3284403486238532, "calib/final_conf_rate": 0.42578125, "calib/format_rate": 0.36328125, "calib/frac_conf_gt_0.9": 0.12844036697247707, "calib/gap": -0.10269009624060149, "calib/mean_conf": 0.3572862568807339, "calib/mu_c": 0.26778571428571424, "calib/mu_w": 0.37047581052631573, "calib/nonempty_final_conf_rate": 0.42578125, "calib/nonempty_reasoning_rate": 0.6484375, "calib/nonempty_step_conf_rate": 0.578125, "calib/pce": 0.27864311926605506, "calib/std_conf": 0.3425272573392338, "calib/step_conf_rate": 0.578125, "calib/step_q_c": 0.3607142857142857, "calib/step_q_c_n": 56.0, "calib/step_q_gap": 0.09586743886743887, "calib/step_q_w": 0.26484684684684684, "calib/step_q_w_n": 666.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 583.8125, "completions/mean_terminated_length": 617.5867309570312, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.0224, "grad_norm": 0.01277672778815031, "learning_rate": 4.9722222222222224e-06, "loss": 0.2213, "num_tokens": 6429272.0, "reward": 0.33688199520111084, "reward_std": 0.37365442514419556, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.2518889605998993, "rewards/format_reward_step": 0.36328125, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9814799521118402, "aux_distill/mean_u": 0.3856371479271701, "aux_distill/n_active_tok": 107.125, "calib/answer_extract_rate": 0.5703125, "calib/auroc": 0.5931578947368421, "calib/avg_num_step_conf": 3.39453125, "calib/ece": 0.2696144017687075, "calib/final_conf_rate": 0.41015625, "calib/format_rate": 0.359375, "calib/frac_conf_gt_0.9": 0.11428571428571428, "calib/gap": 0.1432568040601503, "calib/mean_conf": 0.3313867010884354, "calib/mu_c": 0.46099999999999997, "calib/mu_w": 0.3177431959398497, "calib/nonempty_final_conf_rate": 0.41015625, "calib/nonempty_reasoning_rate": 0.71875, "calib/nonempty_step_conf_rate": 0.62890625, "calib/pce": 0.2528815038095238, "calib/std_conf": 0.3426169310066443, "calib/step_conf_rate": 0.62890625, "calib/step_q_c": 0.21620727272727272, "calib/step_q_c_n": 55.0, "calib/step_q_gap": -0.033706065082368813, "calib/step_q_w": 0.24991333780964153, "calib/step_q_w_n": 809.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 554.1015625, "completions/mean_terminated_length": 574.29150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.023466666666666667, "grad_norm": 0.01487016212195158, "learning_rate": 4.944444444444445e-06, "loss": 0.1965, "num_tokens": 6696746.0, "reward": 0.3352200388908386, "reward_std": 0.42807674407958984, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.26809632778167725, "rewards/format_reward_step": 0.359375, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9744270667433739, "aux_distill/mean_u": 0.31499481351199665, "aux_distill/n_active_tok": 121.125, "calib/answer_extract_rate": 0.62109375, "calib/auroc": 0.4381720430107527, "calib/avg_num_step_conf": 3.79296875, "calib/ece": 0.31510691602159824, "calib/final_conf_rate": 0.53125, "calib/format_rate": 0.421875, "calib/frac_conf_gt_0.9": 0.11764705882352941, "calib/gap": -0.04729891960110766, "calib/mean_conf": 0.33540048551865703, "calib/mu_c": 0.292275, "calib/mu_w": 0.33957391960110767, "calib/nonempty_final_conf_rate": 0.53125, "calib/nonempty_reasoning_rate": 0.828125, "calib/nonempty_step_conf_rate": 0.74609375, "calib/pce": 0.2811360537113041, "calib/std_conf": 0.34317791131441383, "calib/step_conf_rate": 0.74609375, "calib/step_q_c": 0.3123102040816327, "calib/step_q_c_n": 49.0, "calib/step_q_gap": 0.05884855061162214, "calib/step_q_w": 0.25346165347001054, "calib/step_q_w_n": 922.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2973.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 566.89453125, "completions/mean_terminated_length": 580.5, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.024533333333333334, "grad_norm": 0.011434352025389671, "learning_rate": 4.9166666666666665e-06, "loss": 0.2523, "num_tokens": 6969615.0, "reward": 0.39768069982528687, "reward_std": 0.47356581687927246, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.31489264965057373, "rewards/format_reward_step": 0.421875, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9627590533345938, "aux_distill/mean_u": 0.3975181422269218, "aux_distill/n_active_tok": 141.125, "calib/answer_extract_rate": 0.6796875, "calib/auroc": 0.39983022071307295, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.27329715482517486, "calib/final_conf_rate": 0.55859375, "calib/format_rate": 0.44921875, "calib/frac_conf_gt_0.9": 0.06993006993006994, "calib/gap": -0.12481586148556881, "calib/mean_conf": 0.3192845674125875, "calib/mu_c": 0.21105263157894735, "calib/mu_w": 0.33586849306451616, "calib/nonempty_final_conf_rate": 0.55859375, "calib/nonempty_reasoning_rate": 0.890625, "calib/nonempty_step_conf_rate": 0.7890625, "calib/pce": 0.22985729468531468, "calib/std_conf": 0.31422027889339454, "calib/step_conf_rate": 0.7890625, "calib/step_q_c": 0.2549627617364461, "calib/step_q_c_n": 115.0, "calib/step_q_gap": 0.005681280665347288, "calib/step_q_w": 0.24928148107109882, "calib/step_q_w_n": 1083.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3022.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 565.36328125, "completions/mean_terminated_length": 576.62548828125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0256, "grad_norm": 0.012067703530192375, "learning_rate": 4.888888888888889e-06, "loss": 0.3338, "num_tokens": 7242668.0, "reward": 0.43442392349243164, "reward_std": 0.43748435378074646, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.3258790969848633, "rewards/format_reward_step": 0.44921875, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9440481103956699, "aux_distill/mean_u": 0.35665699968748665, "aux_distill/n_active_tok": 139.125, "calib/answer_extract_rate": 0.77734375, "calib/auroc": 0.44574780058651026, "calib/avg_num_step_conf": 4.61328125, "calib/ece": 0.29719062486867937, "calib/final_conf_rate": 0.69140625, "calib/format_rate": 0.6015625, "calib/frac_conf_gt_0.9": 0.11299435028248588, "calib/gap": -0.059031992152093093, "calib/mean_conf": 0.34487650057489405, "calib/mu_c": 0.2931818181818182, "calib/mu_w": 0.3522138103339113, "calib/nonempty_final_conf_rate": 0.69140625, "calib/nonempty_reasoning_rate": 0.9296875, "calib/nonempty_step_conf_rate": 0.875, "calib/pce": 0.2588866700664195, "calib/std_conf": 0.3256446003305085, "calib/step_conf_rate": 0.875, "calib/step_q_c": 0.24164233576642338, "calib/step_q_c_n": 137.0, "calib/step_q_gap": -0.029769908720633148, "calib/step_q_w": 0.27141224448705653, "calib/step_q_w_n": 1043.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 507.72265625, "completions/mean_terminated_length": 515.7817993164062, "completions/min_length": 0.0, "completions/min_terminated_length": 22.0, "epoch": 0.02666666666666667, "grad_norm": 0.011941850185394287, "learning_rate": 4.861111111111111e-06, "loss": 0.242, "num_tokens": 7499677.0, "reward": 0.5643470883369446, "reward_std": 0.4552688002586365, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.429475337266922, "rewards/format_reward_step": 0.6015625, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9647899996489286, "aux_distill/mean_u": 0.43523350339639616, "aux_distill/n_active_tok": 147.0, "calib/answer_extract_rate": 0.77734375, "calib/auroc": 0.47863924050632906, "calib/avg_num_step_conf": 4.59375, "calib/ece": 0.28037136764367815, "calib/final_conf_rate": 0.6796875, "calib/format_rate": 0.58984375, "calib/frac_conf_gt_0.9": 0.06896551724137931, "calib/gap": -0.0215346074050633, "calib/mean_conf": 0.3493794136206897, "calib/mu_c": 0.329825, "calib/mu_w": 0.3513596074050633, "calib/nonempty_final_conf_rate": 0.6796875, "calib/nonempty_reasoning_rate": 0.9453125, "calib/nonempty_step_conf_rate": 0.87890625, "calib/pce": 0.268898379137931, "calib/std_conf": 0.3015161495424351, "calib/step_conf_rate": 0.87890625, "calib/step_q_c": 0.2793808823529412, "calib/step_q_c_n": 68.0, "calib/step_q_gap": -0.004022890408791668, "calib/step_q_w": 0.28340377276173284, "calib/step_q_w_n": 1108.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 458.015625, "completions/mean_terminated_length": 465.2857360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.027733333333333332, "grad_norm": 0.012656077742576599, "learning_rate": 4.833333333333333e-06, "loss": 0.1847, "num_tokens": 7745977.0, "reward": 0.5486533641815186, "reward_std": 0.44643837213516235, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.4449630379676819, "rewards/format_reward_step": 0.58984375, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9289473351091146, "aux_distill/mean_u": 0.37751402623273483, "aux_distill/n_active_tok": 146.75, "calib/answer_extract_rate": 0.859375, "calib/auroc": 0.4695383150880533, "calib/avg_num_step_conf": 4.5859375, "calib/ece": 0.32406237623762374, "calib/final_conf_rate": 0.7890625, "calib/format_rate": 0.71875, "calib/frac_conf_gt_0.9": 0.054455445544554455, "calib/gap": 0.004386673012851006, "calib/mean_conf": 0.3522158415841585, "calib/mu_c": 0.3563636363636363, "calib/mu_w": 0.3519769633507853, "calib/nonempty_final_conf_rate": 0.7890625, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.93359375, "calib/pce": 0.31091138613861385, "calib/std_conf": 0.2945769718143318, "calib/step_conf_rate": 0.93359375, "calib/step_q_c": 0.25133333333333335, "calib/step_q_c_n": 60.0, "calib/step_q_gap": -0.06902260023937762, "calib/step_q_w": 0.32035593357271097, "calib/step_q_w_n": 1114.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2577.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 423.5546875, "completions/mean_terminated_length": 425.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0288, "grad_norm": 0.010853436775505543, "learning_rate": 4.805555555555556e-06, "loss": 0.2049, "num_tokens": 7983431.0, "reward": 0.6602060198783875, "reward_std": 0.41780614852905273, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.5508807897567749, "rewards/format_reward_step": 0.71875, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9501634556800127, "aux_distill/mean_u": 0.3617846596280876, "aux_distill/n_active_tok": 118.25, "calib/answer_extract_rate": 0.8359375, "calib/auroc": 0.6288407821229051, "calib/avg_num_step_conf": 3.703125, "calib/ece": 0.29103288901767604, "calib/final_conf_rate": 0.79296875, "calib/format_rate": 0.671875, "calib/frac_conf_gt_0.9": 0.09359605911330049, "calib/gap": 0.15580383256654617, "calib/mean_conf": 0.38630382497826715, "calib/mu_c": 0.5236875, "calib/mu_w": 0.3678836674334538, "calib/nonempty_final_conf_rate": 0.79296875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.2795550565053608, "calib/std_conf": 0.2957872801567985, "calib/step_conf_rate": 0.90234375, "calib/step_q_c": 0.38362744047619046, "calib/step_q_c_n": 84.0, "calib/step_q_gap": 0.03731586640211637, "calib/step_q_w": 0.3463115740740741, "calib/step_q_w_n": 864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 391.390625, "completions/mean_terminated_length": 394.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.029866666666666666, "grad_norm": 0.011087479069828987, "learning_rate": 4.777777777777778e-06, "loss": 0.1907, "num_tokens": 8214379.0, "reward": 0.6477793455123901, "reward_std": 0.4602370262145996, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.518215000629425, "rewards/format_reward_step": 0.671875, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9244299307465553, "aux_distill/mean_u": 0.3844669224833983, "aux_distill/n_active_tok": 126.375, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.540199530516432, "calib/avg_num_step_conf": 3.97265625, "calib/ece": 0.37245397873303165, "calib/final_conf_rate": 0.86328125, "calib/format_rate": 0.77734375, "calib/frac_conf_gt_0.9": 0.08144796380090498, "calib/gap": 0.02993037887323935, "calib/mean_conf": 0.4086530737556562, "calib/mu_c": 0.4375, "calib/mu_w": 0.40756962112676065, "calib/nonempty_final_conf_rate": 0.86328125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.37245397873303165, "calib/std_conf": 0.29742238264250437, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.40611111111111114, "calib/step_q_c_n": 18.0, "calib/step_q_gap": 0.0017035261261261203, "calib/step_q_w": 0.404407584984985, "calib/step_q_w_n": 999.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 363.75390625, "completions/mean_terminated_length": 365.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.030933333333333334, "grad_norm": 0.01006327848881483, "learning_rate": 4.75e-06, "loss": 0.1764, "num_tokens": 8438436.0, "reward": 0.6926558017730713, "reward_std": 0.38272884488105774, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.5767179727554321, "rewards/format_reward_step": 0.77734375, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.963107381016016, "aux_distill/mean_u": 0.35260435378325244, "aux_distill/n_active_tok": 113.625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4605082417582418, "calib/avg_num_step_conf": 3.55078125, "calib/ece": 0.3415881856540084, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.04219409282700422, "calib/gap": -0.04613557692307696, "calib/mean_conf": 0.38668185654008436, "calib/mu_c": 0.34307692307692306, "calib/mu_w": 0.3892125, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.33670886075949363, "calib/std_conf": 0.27919238238117394, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.4132608695652174, "calib/step_q_c_n": 46.0, "calib/step_q_gap": 0.0017828417001836683, "calib/step_q_w": 0.41147802786503374, "calib/step_q_w_n": 863.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2993.0, "completions/max_terminated_length": 2993.0, "completions/mean_length": 326.765625, "completions/mean_terminated_length": 326.765625, "completions/min_length": 1.0, "completions/min_terminated_length": 1.0, "epoch": 0.032, "grad_norm": 0.011806800030171871, "learning_rate": 4.722222222222222e-06, "loss": 0.2685, "num_tokens": 8652880.0, "reward": 0.8086734414100647, "reward_std": 0.2870190441608429, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.6681281924247742, "rewards/format_reward_step": 0.88671875, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9272862039506435, "aux_distill/mean_u": 0.33398219030968823, "aux_distill/n_active_tok": 120.875, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5596361185983827, "calib/avg_num_step_conf": 3.77734375, "calib/ece": 0.3165919930890386, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.07296137339055794, "calib/gap": 0.046519626643156786, "calib/mean_conf": 0.40672074845384554, "calib/mu_c": 0.44904761904761903, "calib/mu_w": 0.40252799240446224, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3165919930890386, "calib/std_conf": 0.2837682946800375, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.48237500000000005, "calib/step_q_c_n": 80.0, "calib/step_q_gap": 0.053977691069966305, "calib/step_q_w": 0.42839730893003375, "calib/step_q_w_n": 887.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 300.38671875, "completions/mean_terminated_length": 302.751953125, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.03306666666666667, "grad_norm": 0.011283627711236477, "learning_rate": 4.694444444444445e-06, "loss": 0.238, "num_tokens": 8859499.0, "reward": 0.8242871761322021, "reward_std": 0.3063260316848755, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.6681056022644043, "rewards/format_reward_step": 0.8984375, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.979875098913908, "aux_distill/mean_u": 0.366037572053497, "aux_distill/n_active_tok": 107.5, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5059424520433695, "calib/avg_num_step_conf": 3.359375, "calib/ece": 0.31686250000000005, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.058333333333333334, "calib/gap": 0.006073811509591465, "calib/mean_conf": 0.3849375, "calib/mu_c": 0.39045454545454555, "calib/mu_w": 0.3843807339449541, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.3050666666666667, "calib/std_conf": 0.282678896619026, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.4355492537313433, "calib/step_q_c_n": 67.0, "calib/step_q_gap": 0.03124660555984271, "calib/step_q_w": 0.4043026481715006, "calib/step_q_w_n": 793.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2144.0, "completions/max_terminated_length": 2144.0, "completions/mean_length": 267.640625, "completions/mean_terminated_length": 267.640625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.034133333333333335, "grad_norm": 0.011885612271726131, "learning_rate": 4.666666666666667e-06, "loss": 0.113, "num_tokens": 9058527.0, "reward": 0.833459734916687, "reward_std": 0.2733987271785736, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.678638219833374, "rewards/format_reward_step": 0.90234375, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9799461141228676, "aux_distill/mean_u": 0.37253693561336365, "aux_distill/n_active_tok": 98.375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5056140350877192, "calib/avg_num_step_conf": 3.07421875, "calib/ece": 0.2963055922373021, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.03278688524590164, "calib/gap": 0.020465030552545727, "calib/mean_conf": 0.3548127684184357, "calib/mu_c": 0.37368421052631584, "calib/mu_w": 0.3532191799737701, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2866247540983607, "calib/std_conf": 0.25139900802320464, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.5187301587301587, "calib/step_q_c_n": 63.0, "calib/step_q_gap": 0.13456536591247914, "calib/step_q_w": 0.38416479281767957, "calib/step_q_w_n": 724.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1467.0, "completions/max_terminated_length": 1467.0, "completions/mean_length": 244.12890625, "completions/mean_terminated_length": 244.12890625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.0352, "grad_norm": 0.01415384653955698, "learning_rate": 4.638888888888889e-06, "loss": 0.1471, "num_tokens": 9251704.0, "reward": 0.867424726486206, "reward_std": 0.2542864680290222, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7348494529724121, "rewards/format_reward_step": 0.921875, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9852313920855522, "aux_distill/mean_u": 0.362606382446605, "aux_distill/n_active_tok": 95.25, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5539965986394558, "calib/avg_num_step_conf": 2.9765625, "calib/ece": 0.2839067601360692, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.02040816326530612, "calib/gap": 0.06695019538688857, "calib/mean_conf": 0.34450267850341615, "calib/mu_c": 0.4057142857142857, "calib/mu_w": 0.3387640903273971, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.27134757646259977, "calib/std_conf": 0.24820097732672813, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.3980327868852459, "calib/step_q_c_n": 61.0, "calib/step_q_gap": 0.013356609995088975, "calib/step_q_w": 0.38467617689015693, "calib/step_q_w_n": 701.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2447.0, "completions/max_terminated_length": 2447.0, "completions/mean_length": 223.28125, "completions/mean_terminated_length": 223.28125, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "epoch": 0.03626666666666667, "grad_norm": 0.013528089970350266, "learning_rate": 4.611111111111112e-06, "loss": 0.1617, "num_tokens": 9437784.0, "reward": 0.8848718404769897, "reward_std": 0.24772733449935913, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.7541186809539795, "rewards/format_reward_step": 0.93359375, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9460540004074574, "aux_distill/mean_u": 0.39174463892047273, "aux_distill/n_active_tok": 95.5, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4527986633249791, "calib/avg_num_step_conf": 2.984375, "calib/ece": 0.23544979919678713, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": -0.04850814536340847, "calib/mean_conf": 0.3087028112449799, "calib/mu_c": 0.2642857142857143, "calib/mu_w": 0.31279385964912276, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.22990763052208835, "calib/std_conf": 0.2189346054831001, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3818421052631578, "calib/step_q_c_n": 57.0, "calib/step_q_gap": 0.024514948261743386, "calib/step_q_w": 0.35732715700141443, "calib/step_q_w_n": 707.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1673.0, "completions/max_terminated_length": 1673.0, "completions/mean_length": 215.12890625, "completions/mean_terminated_length": 215.12890625, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.037333333333333336, "grad_norm": 0.012646835297346115, "learning_rate": 4.583333333333333e-06, "loss": 0.1052, "num_tokens": 9625921.0, "reward": 0.9041399955749512, "reward_std": 0.18901212513446808, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.7770299911499023, "rewards/format_reward_step": 0.94921875, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9608928207308054, "aux_distill/mean_u": 0.3588672235356567, "aux_distill/n_active_tok": 91.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4842995169082125, "calib/avg_num_step_conf": 2.8515625, "calib/ece": 0.1956403162055336, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.013333333333333308, "calib/mean_conf": 0.29721343873517786, "calib/mu_c": 0.286304347826087, "calib/mu_w": 0.2996376811594203, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.15551778656126483, "calib/std_conf": 0.22247753783059368, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.36039370078740157, "calib/step_q_c_n": 127.0, "calib/step_q_gap": 0.017015591334665237, "calib/step_q_w": 0.34337810945273634, "calib/step_q_w_n": 603.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 752.0, "completions/max_terminated_length": 752.0, "completions/mean_length": 185.0703125, "completions/mean_terminated_length": 185.7960968017578, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.0384, "grad_norm": 0.01309342123568058, "learning_rate": 4.555555555555556e-06, "loss": 0.1108, "num_tokens": 9799819.0, "reward": 0.9677448272705078, "reward_std": 0.16143733263015747, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.7714272737503052, "rewards/format_reward_step": 0.984375, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9541783407330513, "aux_distill/mean_u": 0.3490026414154334, "aux_distill/n_active_tok": 93.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5971202710333146, "calib/avg_num_step_conf": 2.9140625, "calib/ece": 0.19826377952755905, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.08735610766045548, "calib/mean_conf": 0.2888149606299213, "calib/mu_c": 0.3682608695652174, "calib/mu_w": 0.28090476190476193, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.19826377952755905, "calib/std_conf": 0.21542403070346172, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.37949152542372877, "calib/step_q_c_n": 59.0, "calib/step_q_gap": 0.046236794710482754, "calib/step_q_w": 0.333254730713246, "calib/step_q_w_n": 687.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 519.0, "completions/max_terminated_length": 519.0, "completions/mean_length": 186.54296875, "completions/mean_terminated_length": 187.27452087402344, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.039466666666666664, "grad_norm": 0.01288458239287138, "learning_rate": 4.527777777777778e-06, "loss": 0.1158, "num_tokens": 9978478.0, "reward": 0.9314919710159302, "reward_std": 0.1761791706085205, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.8122028112411499, "rewards/format_reward_step": 0.9609375, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9978272467851639, "aux_distill/mean_u": 0.41385125474943524, "aux_distill/n_active_tok": 93.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5765827093260721, "calib/avg_num_step_conf": 2.91796875, "calib/ece": 0.14998690476190477, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.019804935330156537, "calib/mean_conf": 0.25316150793650793, "calib/mu_c": 0.27092307692307693, "calib/mu_w": 0.2511181415929204, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.14998690476190477, "calib/std_conf": 0.20493503148430275, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.3125, "calib/step_q_c_n": 72.0, "calib/step_q_gap": 0.02171377777777772, "calib/step_q_w": 0.2907862222222223, "calib/step_q_w_n": 675.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 542.0, "completions/max_terminated_length": 542.0, "completions/mean_length": 180.4921875, "completions/mean_terminated_length": 181.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.04053333333333333, "grad_norm": 0.013265673071146011, "learning_rate": 4.5e-06, "loss": 0.0651, "num_tokens": 10155380.0, "reward": 0.9324043989181519, "reward_std": 0.16627129912376404, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8062150478363037, "rewards/format_reward_step": 0.953125, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9659969937056303, "aux_distill/mean_u": 0.3698160120751096, "aux_distill/n_active_tok": 103.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4778632478632479, "calib/avg_num_step_conf": 3.234375, "calib/ece": 0.1762390438247012, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015045470085470053, "calib/mean_conf": 0.23882071713147413, "calib/mu_c": 0.2523076923076923, "calib/mu_w": 0.23726222222222224, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.15573705179282873, "calib/std_conf": 0.1808239985600988, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3414432989690722, "calib/step_q_c_n": 97.0, "calib/step_q_gap": 0.04963755341503667, "calib/step_q_w": 0.29180574555403554, "calib/step_q_w_n": 731.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 194.640625, "completions/mean_terminated_length": 195.4039306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.0416, "grad_norm": 0.011580703780055046, "learning_rate": 4.472222222222223e-06, "loss": 0.0894, "num_tokens": 10335104.0, "reward": 0.9487248063087463, "reward_std": 0.14561089873313904, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8271371126174927, "rewards/format_reward_step": 0.96484375, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9587426632642746, "aux_distill/mean_u": 0.3633853945422882, "aux_distill/n_active_tok": 110.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4965065502183406, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.15555118110236218, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": 0.0032855895196506613, "calib/mean_conf": 0.22263779527559055, "calib/mu_c": 0.2256, "calib/mu_w": 0.22231441048034933, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13988188976377955, "calib/std_conf": 0.1837077947369346, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31063829787234043, "calib/step_q_c_n": 94.0, "calib/step_q_gap": 0.024195819996234214, "calib/step_q_w": 0.2864424778761062, "calib/step_q_w_n": 791.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 507.0, "completions/max_terminated_length": 507.0, "completions/mean_length": 197.40625, "completions/mean_terminated_length": 198.18040466308594, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.042666666666666665, "grad_norm": 0.012859523296356201, "learning_rate": 4.444444444444444e-06, "loss": 0.1088, "num_tokens": 10516208.0, "reward": 0.9728861451148987, "reward_std": 0.09098789095878601, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.8559284806251526, "rewards/format_reward_step": 0.9921875, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9850291721522808, "aux_distill/mean_u": 0.3810529126961046, "aux_distill/n_active_tok": 105.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5601572598018095, "calib/avg_num_step_conf": 3.3046875, "calib/ece": 0.1174117683764826, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.02264431306585027, "calib/mean_conf": 0.22149019240783113, "calib/mu_c": 0.24022725145447588, "calib/mu_w": 0.2175829383886256, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0831764705882353, "calib/std_conf": 0.1776340592509435, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3362070422535211, "calib/step_q_c_n": 142.0, "calib/step_q_gap": 0.05128942861715746, "calib/step_q_w": 0.28491761363636364, "calib/step_q_w_n": 704.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 437.0, "completions/max_terminated_length": 437.0, "completions/mean_length": 182.26171875, "completions/mean_terminated_length": 182.9764862060547, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.04373333333333333, "grad_norm": 0.01246651541441679, "learning_rate": 4.416666666666667e-06, "loss": 0.0824, "num_tokens": 10693923.0, "reward": 0.9991874694824219, "reward_std": 0.10035333037376404, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8264999985694885, "rewards/format_reward_step": 0.99609375, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9697458352893591, "aux_distill/mean_u": 0.39303880177944706, "aux_distill/n_active_tok": 110.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5225159079784629, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.13940944881889764, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011210637950726021, "calib/mean_conf": 0.2240551181102362, "calib/mu_c": 0.23407407407407405, "calib/mu_w": 0.22286343612334802, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12858267716535435, "calib/std_conf": 0.15931599892407236, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2636585365853659, "calib/step_q_c_n": 82.0, "calib/step_q_gap": -0.009936731160586743, "calib/step_q_w": 0.27359526774595266, "calib/step_q_w_n": 803.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1751.0, "completions/max_terminated_length": 1751.0, "completions/mean_length": 197.6328125, "completions/mean_terminated_length": 198.40785217285156, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.0448, "grad_norm": 0.011651011183857918, "learning_rate": 4.388888888888889e-06, "loss": 0.0682, "num_tokens": 10872693.0, "reward": 0.9721810817718506, "reward_std": 0.10417618602514267, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8545183539390564, "rewards/format_reward_step": 0.984375, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9164891820400953, "aux_distill/mean_u": 0.3497377000059944, "aux_distill/n_active_tok": 127.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4870480302212628, "calib/avg_num_step_conf": 3.9765625, "calib/ece": 0.16513095238095243, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.008839449541284405, "calib/mean_conf": 0.2123531746031746, "calib/mu_c": 0.22, "calib/mu_w": 0.2111605504587156, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12128174603174603, "calib/std_conf": 0.1834661160647659, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2771200000000001, "calib/step_q_c_n": 125.0, "calib/step_q_gap": -0.01632882418812981, "calib/step_q_w": 0.2934488241881299, "calib/step_q_w_n": 893.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 206.765625, "completions/mean_terminated_length": 207.57647705078125, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.04586666666666667, "grad_norm": 0.011534487828612328, "learning_rate": 4.361111111111112e-06, "loss": 0.1498, "num_tokens": 11054657.0, "reward": 0.9709699153900146, "reward_std": 0.12181063741445541, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8286584615707397, "rewards/format_reward_step": 0.98046875, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9449839890003204, "aux_distill/mean_u": 0.37906744113113067, "aux_distill/n_active_tok": 125.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5441608312627061, "calib/avg_num_step_conf": 3.93359375, "calib/ece": 0.1222142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004717415857239643, "calib/mean_conf": 0.18826984126984128, "calib/mu_c": 0.19263157894736843, "calib/mu_w": 0.1879141630901288, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1175436507936508, "calib/std_conf": 0.1649879222466796, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33276923076923076, "calib/step_q_c_n": 65.0, "calib/step_q_gap": 0.053753307202351774, "calib/step_q_w": 0.279015923566879, "calib/step_q_w_n": 942.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 551.0, "completions/max_terminated_length": 551.0, "completions/mean_length": 198.57421875, "completions/mean_terminated_length": 200.13780212402344, "completions/min_length": 0.0, "completions/min_terminated_length": 40.0, "epoch": 0.046933333333333334, "grad_norm": 0.012412100099027157, "learning_rate": 4.333333333333334e-06, "loss": 0.0443, "num_tokens": 11235620.0, "reward": 0.9523601531982422, "reward_std": 0.13291436433792114, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.8617515563964844, "rewards/format_reward_step": 0.96875, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9304977301508188, "aux_distill/mean_u": 0.33005481343618503, "aux_distill/n_active_tok": 125.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49855861627162074, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.1155418326693227, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0063657911595131345, "calib/mean_conf": 0.18422709163346612, "calib/mu_c": 0.17857142857142858, "calib/mu_w": 0.1849372197309417, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09410756972111552, "calib/std_conf": 0.15502340211174045, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2783673469387756, "calib/step_q_c_n": 98.0, "calib/step_q_gap": -0.016820498365091807, "calib/step_q_w": 0.2951878453038674, "calib/step_q_w_n": 905.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2175.0, "completions/max_terminated_length": 2175.0, "completions/mean_length": 212.3984375, "completions/mean_terminated_length": 212.3984375, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.048, "grad_norm": 0.010993365198373795, "learning_rate": 4.305555555555556e-06, "loss": 0.1596, "num_tokens": 11418850.0, "reward": 0.9715802073478699, "reward_std": 0.10800088942050934, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.8533166646957397, "rewards/format_reward_step": 0.98046875, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9679493922740221, "aux_distill/mean_u": 0.40249134664638886, "aux_distill/n_active_tok": 144.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.477030529953917, "calib/avg_num_step_conf": 4.5390625, "calib/ece": 0.12313725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.002782258064516152, "calib/mean_conf": 0.17341176470588235, "calib/mu_c": 0.17096774193548386, "calib/mu_w": 0.17375000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08749019607843136, "calib/std_conf": 0.15773807732460254, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22230769230769235, "calib/step_q_c_n": 104.0, "calib/step_q_gap": -0.04604816780572921, "calib/step_q_w": 0.26835586011342155, "calib/step_q_w_n": 1058.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 736.0, "completions/max_terminated_length": 736.0, "completions/mean_length": 213.00390625, "completions/mean_terminated_length": 213.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.04906666666666667, "grad_norm": 0.010834389366209507, "learning_rate": 4.277777777777778e-06, "loss": 0.1079, "num_tokens": 11601955.0, "reward": 0.9894277453422546, "reward_std": 0.0719679743051529, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8616679906845093, "rewards/format_reward_step": 0.99609375, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9069111328572035, "aux_distill/mean_u": 0.4040764168674257, "aux_distill/n_active_tok": 168.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5417624521072797, "calib/avg_num_step_conf": 5.25390625, "calib/ece": 0.11180708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002361839080459782, "calib/mean_conf": 0.16756299212598424, "calib/mu_c": 0.16965517241379313, "calib/mu_w": 0.16729333333333335, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08259842519685039, "calib/std_conf": 0.16273949634242862, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24847328244274808, "calib/step_q_c_n": 131.0, "calib/step_q_gap": -0.0478842134386358, "calib/step_q_w": 0.2963574958813839, "calib/step_q_w_n": 1214.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1203.0, "completions/max_terminated_length": 1203.0, "completions/mean_length": 240.28515625, "completions/mean_terminated_length": 241.2274627685547, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.050133333333333335, "grad_norm": 0.010445504449307919, "learning_rate": 4.25e-06, "loss": 0.0842, "num_tokens": 11793252.0, "reward": 0.9773474335670471, "reward_std": 0.10240879654884338, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.8570386171340942, "rewards/format_reward_step": 0.984375, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9124491214752197, "aux_distill/mean_u": 0.3623996561858781, "aux_distill/n_active_tok": 172.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4587806985067259, "calib/avg_num_step_conf": 5.39453125, "calib/ece": 0.12156250000000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.014392200419597684, "calib/mean_conf": 0.139609375, "calib/mu_c": 0.1272972972972973, "calib/mu_w": 0.14168949771689499, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05832031249999999, "calib/std_conf": 0.1444380400452366, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30833333333333335, "calib/step_q_c_n": 144.0, "calib/step_q_gap": 0.04539073026138507, "calib/step_q_w": 0.2629426030719483, "calib/step_q_w_n": 1237.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 734.0, "completions/max_terminated_length": 734.0, "completions/mean_length": 249.046875, "completions/mean_terminated_length": 250.02354431152344, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.0512, "grad_norm": 0.010337688960134983, "learning_rate": 4.222222222222223e-06, "loss": 0.1028, "num_tokens": 11984504.0, "reward": 0.9982218742370605, "reward_std": 0.05581098794937134, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.8519124984741211, "rewards/format_reward_step": 1.0, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9039432909339666, "aux_distill/mean_u": 0.39276995387199315, "aux_distill/n_active_tok": 187.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4854655148772796, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.10531102362204726, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.017046894282188363, "calib/mean_conf": 0.1284685039370079, "calib/mu_c": 0.11363636363636365, "calib/mu_w": 0.130683257918552, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05192913385826772, "calib/std_conf": 0.1368993717457314, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27117647058823524, "calib/step_q_c_n": 187.0, "calib/step_q_gap": -0.011019375109094176, "calib/step_q_w": 0.2821958456973294, "calib/step_q_w_n": 1348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1228.0, "completions/max_terminated_length": 1228.0, "completions/mean_length": 258.82421875, "completions/mean_terminated_length": 259.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.05226666666666667, "grad_norm": 0.009132636711001396, "learning_rate": 4.194444444444445e-06, "loss": 0.0703, "num_tokens": 12179107.0, "reward": 0.9893507957458496, "reward_std": 0.06767857074737549, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.8576078414916992, "rewards/format_reward_step": 0.9921875, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8850006274878979, "aux_distill/mean_u": 0.3797831229916548, "aux_distill/n_active_tok": 185.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6007246376811595, "calib/avg_num_step_conf": 5.79296875, "calib/ece": 0.06984251968503936, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03944202898550725, "calib/mean_conf": 0.11511811023622047, "calib/mu_c": 0.15083333333333335, "calib/mu_w": 0.1113913043478261, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04523622047244095, "calib/std_conf": 0.12903253658977257, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33408333333333334, "calib/step_q_c_n": 120.0, "calib/step_q_gap": 0.04620365615064809, "calib/step_q_w": 0.28787967718268526, "calib/step_q_w_n": 1363.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2113.0, "completions/max_terminated_length": 2113.0, "completions/mean_length": 270.01953125, "completions/mean_terminated_length": 270.01953125, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.05333333333333334, "grad_norm": 0.01040023472160101, "learning_rate": 4.166666666666667e-06, "loss": 0.1518, "num_tokens": 12377400.0, "reward": 0.9875878691673279, "reward_std": 0.07120494544506073, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8931445479393005, "rewards/format_reward_step": 0.98828125, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9002226144075394, "aux_distill/mean_u": 0.33791306730053283, "aux_distill/n_active_tok": 180.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47374512670565305, "calib/avg_num_step_conf": 5.640625, "calib/ece": 0.10523622047244093, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0018153021442495337, "calib/mean_conf": 0.09838582677165356, "calib/mu_c": 0.09684210526315788, "calib/mu_w": 0.09865740740740742, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02700787401574803, "calib/std_conf": 0.10150517206822475, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2531891891891892, "calib/step_q_c_n": 185.0, "calib/step_q_gap": -0.03856299508404354, "calib/step_q_w": 0.29175218427323274, "calib/step_q_w_n": 1259.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 607.0, "completions/max_terminated_length": 607.0, "completions/mean_length": 250.42578125, "completions/mean_terminated_length": 251.40785217285156, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.0544, "grad_norm": 0.0104011669754982, "learning_rate": 4.138888888888889e-06, "loss": 0.1156, "num_tokens": 12574613.0, "reward": 0.9927429556846619, "reward_std": 0.06316731870174408, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8487671613693237, "rewards/format_reward_step": 0.98828125, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9108906034380198, "aux_distill/mean_u": 0.37710724738107515, "aux_distill/n_active_tok": 184.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4271367521367522, "calib/avg_num_step_conf": 5.75, "calib/ece": 0.06745669291338584, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01048290598290598, "calib/mean_conf": 0.07915748031496064, "calib/mu_c": 0.0695, "calib/mu_w": 0.07998290598290599, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03393700787401575, "calib/std_conf": 0.08907043352922596, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1876635514018692, "calib/step_q_c_n": 107.0, "calib/step_q_gap": -0.08009469035637257, "calib/step_q_w": 0.26775824175824176, "calib/step_q_w_n": 1365.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1690.0, "completions/max_terminated_length": 1690.0, "completions/mean_length": 264.8359375, "completions/mean_terminated_length": 264.8359375, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.055466666666666664, "grad_norm": 0.00995530467480421, "learning_rate": 4.111111111111111e-06, "loss": 0.1265, "num_tokens": 12774171.0, "reward": 0.9905729293823242, "reward_std": 0.04434080421924591, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.9108333587646484, "rewards/format_reward_step": 0.9921875, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9225151222199202, "aux_distill/mean_u": 0.37064661099430596, "aux_distill/n_active_tok": 182.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4493873811977556, "calib/avg_num_step_conf": 5.74609375, "calib/ece": 0.11933070866141732, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006633459292339397, "calib/mean_conf": 0.0601968503937008, "calib/mu_c": 0.05463414634146342, "calib/mu_w": 0.06126760563380282, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009055118110236224, "calib/std_conf": 0.07679839025647278, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23280612244897958, "calib/step_q_c_n": 196.0, "calib/step_q_gap": -0.060803132452981246, "calib/step_q_w": 0.2936092549019608, "calib/step_q_w_n": 1275.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 720.0, "completions/max_terminated_length": 720.0, "completions/mean_length": 256.7265625, "completions/mean_terminated_length": 257.73333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.05653333333333333, "grad_norm": 0.011194716207683086, "learning_rate": 4.083333333333334e-06, "loss": 0.1016, "num_tokens": 12969525.0, "reward": 0.9962138533592224, "reward_std": 0.04443935677409172, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8400839567184448, "rewards/format_reward_step": 0.9921875, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8998712226748466, "aux_distill/mean_u": 0.32628339549880664, "aux_distill/n_active_tok": 176.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46494611457742485, "calib/avg_num_step_conf": 5.54296875, "calib/ece": 0.13329296875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01152546795235393, "calib/mean_conf": 0.05358203125000001, "calib/mu_c": 0.04390243902439025, "calib/mu_w": 0.05542790697674418, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013359375, "calib/std_conf": 0.07227076575887392, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2622346368715084, "calib/step_q_c_n": 179.0, "calib/step_q_gap": -0.048313750225265795, "calib/step_q_w": 0.3105483870967742, "calib/step_q_w_n": 1240.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 651.0, "completions/max_terminated_length": 651.0, "completions/mean_length": 247.37890625, "completions/mean_terminated_length": 248.34902954101562, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.0576, "grad_norm": 0.011143301613628864, "learning_rate": 4.055555555555556e-06, "loss": 0.1154, "num_tokens": 13162894.0, "reward": 1.0029842853546143, "reward_std": 0.01903366856276989, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8458121418952942, "rewards/format_reward_step": 1.0, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8807304184883833, "aux_distill/mean_u": 0.37027624704653356, "aux_distill/n_active_tok": 185.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47440860215053765, "calib/avg_num_step_conf": 5.83984375, "calib/ece": 0.09199218749999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005567025089605729, "calib/mean_conf": 0.0445703125, "calib/mu_c": 0.03967741935483871, "calib/mu_w": 0.04524444444444444, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007734375000000001, "calib/std_conf": 0.05816707933747701, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2614261744966443, "calib/step_q_c_n": 149.0, "calib/step_q_gap": -0.03416520737556966, "calib/step_q_w": 0.29559138187221395, "calib/step_q_w_n": 1346.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 632.0, "completions/max_terminated_length": 632.0, "completions/mean_length": 252.5703125, "completions/mean_terminated_length": 253.56080627441406, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.058666666666666666, "grad_norm": 0.012026661075651646, "learning_rate": 4.027777777777779e-06, "loss": 0.1282, "num_tokens": 13359184.0, "reward": 0.9982151985168457, "reward_std": 0.024000566452741623, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.879243016242981, "rewards/format_reward_step": 0.99609375, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9121517166495323, "aux_distill/mean_u": 0.37407740327435157, "aux_distill/n_active_tok": 160.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5019558214450068, "calib/avg_num_step_conf": 5.0625, "calib/ece": 0.148300395256917, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01022434422457432, "calib/mean_conf": 0.03320158102766799, "calib/mu_c": 0.024634146341463416, "calib/mu_w": 0.034858490566037736, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009723320158102768, "calib/std_conf": 0.07660552202945613, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.297914691943128, "calib/step_q_c_n": 211.0, "calib/step_q_gap": -0.015171022342586271, "calib/step_q_w": 0.31308571428571424, "calib/step_q_w_n": 1085.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 234.5703125, "completions/mean_terminated_length": 235.49020385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.05973333333333333, "grad_norm": 0.011675729416310787, "learning_rate": 4.000000000000001e-06, "loss": 0.0984, "num_tokens": 13549882.0, "reward": 0.9887820482254028, "reward_std": 0.04803510010242462, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8291265368461609, "rewards/format_reward_step": 0.98828125, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8941332511603832, "aux_distill/mean_u": 0.34353156945045377, "aux_distill/n_active_tok": 161.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47129928093230844, "calib/avg_num_step_conf": 5.03515625, "calib/ece": 0.12662745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010562856434416061, "calib/mean_conf": 0.01847058823529412, "calib/mu_c": 0.017567567567567572, "calib/mu_w": 0.018623853211009178, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.021337946444714605, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24946524064171127, "calib/step_q_c_n": 187.0, "calib/step_q_gap": -0.02679610237099289, "calib/step_q_w": 0.27626134301270416, "calib/step_q_w_n": 1102.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1091.0, "completions/max_terminated_length": 1091.0, "completions/mean_length": 234.6171875, "completions/mean_terminated_length": 235.53726196289062, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.0608, "grad_norm": 0.009379012510180473, "learning_rate": 3.972222222222223e-06, "loss": 0.0756, "num_tokens": 13740544.0, "reward": 0.9982361793518066, "reward_std": 0.017334245145320892, "rewards/accuracy_reward_step": 0.14453125, "rewards/final_brier_reward_step": 0.8558472394943237, "rewards/format_reward_step": 0.99609375, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9004692044109106, "aux_distill/mean_u": 0.3421467995106455, "aux_distill/n_active_tok": 145.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5516298153635742, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.14568627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006524960109414179, "calib/mean_conf": 0.017450980392156864, "calib/mu_c": 0.022926829268292686, "calib/mu_w": 0.016401869158878507, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001176470588235294, "calib/std_conf": 0.029786372542404516, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.302636815920398, "calib/step_q_c_n": 201.0, "calib/step_q_gap": -0.01488650014177817, "calib/step_q_w": 0.31752331606217615, "calib/step_q_w_n": 965.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1124.0, "completions/max_terminated_length": 1124.0, "completions/mean_length": 220.375, "completions/mean_terminated_length": 221.23922729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.06186666666666667, "grad_norm": 0.010609041899442673, "learning_rate": 3.944444444444445e-06, "loss": 0.1015, "num_tokens": 13927088.0, "reward": 0.9991719722747803, "reward_std": 0.01902610808610916, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8420941829681396, "rewards/format_reward_step": 0.99609375, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8911302741616964, "aux_distill/mean_u": 0.33356761779633115, "aux_distill/n_active_tok": 145.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4503644939965694, "calib/avg_num_step_conf": 4.5625, "calib/ece": 0.1625390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002714408233276157, "calib/mean_conf": 0.0129296875, "calib/mu_c": 0.010681818181818183, "calib/mu_w": 0.01339622641509434, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001796875, "calib/std_conf": 0.0249767653260454, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2742857142857143, "calib/step_q_c_n": 182.0, "calib/step_q_gap": -0.03988264271225728, "calib/step_q_w": 0.3141683569979716, "calib/step_q_w_n": 986.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 543.0, "completions/max_terminated_length": 543.0, "completions/mean_length": 214.80078125, "completions/mean_terminated_length": 215.6431427001953, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.06293333333333333, "grad_norm": 0.008717049844563007, "learning_rate": 3.916666666666667e-06, "loss": 0.0844, "num_tokens": 14112133.0, "reward": 1.001440405845642, "reward_std": 0.004281938541680574, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.831005871295929, "rewards/format_reward_step": 1.0, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8966669980436563, "aux_distill/mean_u": 0.32837913568838606, "aux_distill/n_active_tok": 151.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5467383512544803, "calib/avg_num_step_conf": 4.76953125, "calib/ece": 0.111015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00045591397849462263, "calib/mean_conf": 0.010078125, "calib/mu_c": 0.009677419354838712, "calib/mu_w": 0.010133333333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01484272200387702, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2873717948717949, "calib/step_q_c_n": 156.0, "calib/step_q_gap": -0.07487383893102206, "calib/step_q_w": 0.36224563380281694, "calib/step_q_w_n": 1065.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 612.0, "completions/max_terminated_length": 612.0, "completions/mean_length": 211.99609375, "completions/mean_terminated_length": 212.8274688720703, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.064, "grad_norm": 0.008605522103607655, "learning_rate": 3.88888888888889e-06, "loss": 0.0757, "num_tokens": 14299068.0, "reward": 1.0010108947753906, "reward_std": 0.0022478175815194845, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8809281587600708, "rewards/format_reward_step": 1.0, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9263600539416075, "aux_distill/mean_u": 0.31786048663733524, "aux_distill/n_active_tok": 124.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47050865800865793, "calib/avg_num_step_conf": 3.89453125, "calib/ece": 0.1665748031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001725108225108224, "calib/mean_conf": 0.0066535433070866136, "calib/mu_c": 0.005227272727272728, "calib/mu_w": 0.006952380952380952, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.010320744306275885, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29112499999999997, "calib/step_q_c_n": 160.0, "calib/step_q_gap": -0.026999253285543645, "calib/step_q_w": 0.3181242532855436, "calib/step_q_w_n": 837.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 211.0234375, "completions/mean_terminated_length": 211.0234375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.06506666666666666, "grad_norm": 0.008169060572981834, "learning_rate": 3.861111111111112e-06, "loss": 0.1759, "num_tokens": 14480962.0, "reward": 0.9930111169815063, "reward_std": 0.024283651262521744, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8219597339630127, "rewards/format_reward_step": 0.9921875, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9069723356515169, "aux_distill/mean_u": 0.33958075347316097, "aux_distill/n_active_tok": 129.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5061648745519713, "calib/avg_num_step_conf": 4.06640625, "calib/ece": 0.11655078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00039756272401433694, "calib/mean_conf": 0.00454296875, "calib/mu_c": 0.004193548387096774, "calib/mu_w": 0.004591111111111111, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006994843900082648, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24023076923076922, "calib/step_q_c_n": 130.0, "calib/step_q_gap": -0.04175715612598155, "calib/step_q_w": 0.28198792535675077, "calib/step_q_w_n": 911.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 449.0, "completions/max_terminated_length": 449.0, "completions/mean_length": 193.44140625, "completions/mean_terminated_length": 194.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.06613333333333334, "grad_norm": 0.006717332173138857, "learning_rate": 3.833333333333334e-06, "loss": 0.0766, "num_tokens": 14661371.0, "reward": 1.0004730224609375, "reward_std": 0.001246248371899128, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.879852294921875, "rewards/format_reward_step": 1.0, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9175008162856102, "aux_distill/mean_u": 0.37723775517435953, "aux_distill/n_active_tok": 136.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5109437035528861, "calib/avg_num_step_conf": 4.2890625, "calib/ece": 0.17965234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006451185992059454, "calib/mean_conf": 0.0039414062500000005, "calib/mu_c": 0.004468085106382979, "calib/mu_w": 0.0038229665071770336, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007311196757197889, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3179463687150838, "calib/step_q_c_n": 179.0, "calib/step_q_gap": 0.03303777241475736, "calib/step_q_w": 0.28490859630032644, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 523.0, "completions/max_terminated_length": 523.0, "completions/mean_length": 203.7265625, "completions/mean_terminated_length": 204.52549743652344, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0672, "grad_norm": 0.008239316754043102, "learning_rate": 3.8055555555555556e-06, "loss": 0.0949, "num_tokens": 14845973.0, "reward": 0.989067018032074, "reward_std": 0.035046808421611786, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8062590956687927, "rewards/format_reward_step": 0.98828125, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.905761431902647, "aux_distill/mean_u": 0.3367796770974856, "aux_distill/n_active_tok": 133.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4940740740740741, "calib/avg_num_step_conf": 4.1796875, "calib/ece": 0.17380666666666666, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00015857142857142893, "calib/mean_conf": 0.002663921568627451, "calib/mu_c": 0.002533333333333333, "calib/mu_w": 0.002691904761904762, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005241571414739029, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3325, "calib/step_q_c_n": 168.0, "calib/step_q_gap": 0.05929600886917963, "calib/step_q_w": 0.2732039911308204, "calib/step_q_w_n": 902.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 531.0, "completions/max_terminated_length": 531.0, "completions/mean_length": 186.58203125, "completions/mean_terminated_length": 187.31373596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.06826666666666667, "grad_norm": 0.006117277778685093, "learning_rate": 3.777777777777778e-06, "loss": 0.0806, "num_tokens": 15021322.0, "reward": 0.9965218901634216, "reward_std": 0.011855566874146461, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8211686611175537, "rewards/format_reward_step": 0.99609375, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9172791615128517, "aux_distill/mean_u": 0.3287449626680117, "aux_distill/n_active_tok": 129.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4908621651785714, "calib/avg_num_step_conf": 4.0703125, "calib/ece": 0.1229140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005982142857142857, "calib/mean_conf": 0.0020859375, "calib/mu_c": 0.0015625, "calib/mu_w": 0.0021607142857142858, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005229912976914028, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.35062162162162164, "calib/step_q_c_n": 111.0, "calib/step_q_gap": 0.07429509100937676, "calib/step_q_w": 0.2763265306122449, "calib/step_q_w_n": 931.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 476.0, "completions/max_terminated_length": 476.0, "completions/mean_length": 185.33984375, "completions/mean_terminated_length": 186.06668090820312, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.06933333333333333, "grad_norm": 0.005622010678052902, "learning_rate": 3.7500000000000005e-06, "loss": 0.0765, "num_tokens": 15197601.0, "reward": 0.992367148399353, "reward_std": 0.02259797602891922, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.867546796798706, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9069628901779652, "aux_distill/mean_u": 0.3493697144402945, "aux_distill/n_active_tok": 134.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5360857432775241, "calib/avg_num_step_conf": 4.2265625, "calib/ece": 0.1396078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00043759512937595097, "calib/mean_conf": 0.0015686274509803923, "calib/mu_c": 0.0019444444444444442, "calib/mu_w": 0.0015068493150684932, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00467473351221509, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32437086092715234, "calib/step_q_c_n": 151.0, "calib/step_q_gap": 0.03487354621179256, "calib/step_q_w": 0.2894973147153598, "calib/step_q_w_n": 931.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 553.0, "completions/max_terminated_length": 553.0, "completions/mean_length": 196.44921875, "completions/mean_terminated_length": 197.21961975097656, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0704, "grad_norm": 0.005124520510435104, "learning_rate": 3.7222222222222225e-06, "loss": 0.0888, "num_tokens": 15378052.0, "reward": 0.9963550567626953, "reward_std": 0.011599044315516949, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8559914231300354, "rewards/format_reward_step": 0.99609375, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8992666602134705, "aux_distill/mean_u": 0.34523840359572344, "aux_distill/n_active_tok": 141.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5186297465132852, "calib/avg_num_step_conf": 4.4453125, "calib/ece": 0.18244140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004388679629441104, "calib/mean_conf": 0.0011523437500000002, "calib/mu_c": 0.0015106382978723402, "calib/mu_w": 0.0010717703349282298, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003941495291362903, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30930314136125653, "calib/step_q_c_n": 191.0, "calib/step_q_gap": 0.02800430292408651, "calib/step_q_w": 0.28129883843717, "calib/step_q_w_n": 947.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 196.73828125, "completions/mean_terminated_length": 197.5098114013672, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.07146666666666666, "grad_norm": 0.006514097563922405, "learning_rate": 3.694444444444445e-06, "loss": 0.0888, "num_tokens": 15557233.0, "reward": 0.9924563765525818, "reward_std": 0.022890709340572357, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8091315627098083, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8872856739908457, "aux_distill/mean_u": 0.3291543344773491, "aux_distill/n_active_tok": 139.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5060248604769152, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.1403450980392157, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006491628614916287, "calib/mean_conf": 0.0008313725490196078, "calib/mu_c": 0.001388888888888889, "calib/mu_w": 0.0007397260273972603, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003281408724031251, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2880645161290322, "calib/step_q_c_n": 124.0, "calib/step_q_gap": -0.001834371636387444, "calib/step_q_w": 0.28989888776541967, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2429.0, "completions/max_terminated_length": 2429.0, "completions/mean_length": 210.1015625, "completions/mean_terminated_length": 210.1015625, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.07253333333333334, "grad_norm": 0.0044059958308935165, "learning_rate": 3.6666666666666666e-06, "loss": 0.1246, "num_tokens": 15738915.0, "reward": 0.9923771619796753, "reward_std": 0.022651750594377518, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.851941704750061, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8799595925956964, "aux_distill/mean_u": 0.34479266047882573, "aux_distill/n_active_tok": 145.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48727272727272725, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.13633607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00040266233766233757, "calib/mean_conf": 0.0009188235294117647, "calib/mu_c": 0.0005714285714285715, "calib/mu_w": 0.000974090909090909, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0033741866976827045, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32007142857142856, "calib/step_q_c_n": 140.0, "calib/step_q_gap": 0.012712754107490898, "calib/step_q_w": 0.30735867446393766, "calib/step_q_w_n": 1026.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1206.0, "completions/max_terminated_length": 1206.0, "completions/mean_length": 211.64453125, "completions/mean_terminated_length": 212.47451782226562, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.0736, "grad_norm": 0.003253097180277109, "learning_rate": 3.638888888888889e-06, "loss": 0.0896, "num_tokens": 15921400.0, "reward": 0.9961657524108887, "reward_std": 0.011282737366855145, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8595190048217773, "rewards/format_reward_step": 0.99609375, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8753404449671507, "aux_distill/mean_u": 0.33883971306083144, "aux_distill/n_active_tok": 152.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4919236417033774, "calib/avg_num_step_conf": 4.78515625, "calib/ece": 0.10570472440944882, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00025077500407896894, "calib/mean_conf": 0.000594488188976378, "calib/mu_c": 0.00037037037037037035, "calib/mu_w": 0.0006211453744493393, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.002813936270862344, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.22784955752212388, "calib/step_q_c_n": 113.0, "calib/step_q_gap": -0.08005421945629337, "calib/step_q_w": 0.30790377697841725, "calib/step_q_w_n": 1112.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 787.0, "completions/max_terminated_length": 787.0, "completions/mean_length": 208.73828125, "completions/mean_terminated_length": 209.55686950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.07466666666666667, "grad_norm": 0.004173743538558483, "learning_rate": 3.6111111111111115e-06, "loss": 0.0841, "num_tokens": 16105637.0, "reward": 0.9883161783218384, "reward_std": 0.03326448053121567, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8828824162483215, "rewards/format_reward_step": 0.98828125, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8868246395140886, "aux_distill/mean_u": 0.3468969045243898, "aux_distill/n_active_tok": 146.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4878048780487805, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.19591764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002, "calib/mean_conf": 0.0001607843137254902, "calib/mu_c": 0.0, "calib/mu_w": 0.0002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001243662906123547, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3097014925373134, "calib/step_q_c_n": 201.0, "calib/step_q_gap": 0.023662397887107667, "calib/step_q_w": 0.2860390946502057, "calib/step_q_w_n": 972.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 592.0, "completions/max_terminated_length": 592.0, "completions/mean_length": 208.421875, "completions/mean_terminated_length": 209.23922729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.07573333333333333, "grad_norm": 0.002800512593239546, "learning_rate": 3.5833333333333335e-06, "loss": 0.0817, "num_tokens": 16287209.0, "reward": 0.9960929751396179, "reward_std": 0.011049911379814148, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8007797002792358, "rewards/format_reward_step": 0.99609375, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8944362998008728, "aux_distill/mean_u": 0.37095525634503335, "aux_distill/n_active_tok": 160.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.528703180877094, "calib/avg_num_step_conf": 5.078125, "calib/ece": 0.09025196850393702, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006271409749670619, "calib/mean_conf": 0.00029921259842519685, "calib/mu_c": 0.0008695652173913044, "calib/mu_w": 0.00024242424242424242, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018774038056023267, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3302631578947368, "calib/step_q_c_n": 114.0, "calib/step_q_gap": 0.05938626076151593, "calib/step_q_w": 0.2708768971332209, "calib/step_q_w_n": 1186.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 207.21484375, "completions/mean_terminated_length": 208.0274658203125, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.0768, "grad_norm": 0.0037375171668827534, "learning_rate": 3.555555555555556e-06, "loss": 0.0848, "num_tokens": 16468472.0, "reward": 0.9922637939453125, "reward_std": 0.022244982421398163, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.9024963974952698, "rewards/format_reward_step": 0.9921875, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8917406033724546, "aux_distill/mean_u": 0.2963726889406619, "aux_distill/n_active_tok": 146.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49830397912589686, "calib/avg_num_step_conf": 4.6015625, "calib/ece": 0.13739763779527558, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001298108284409654, "calib/mean_conf": 0.00039763779527559055, "calib/mu_c": 0.00028571428571428574, "calib/mu_w": 0.00041552511415525115, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002916717078236597, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36020689655172416, "calib/step_q_c_n": 145.0, "calib/step_q_gap": 0.045115705845044596, "calib/step_q_w": 0.31509119070667957, "calib/step_q_w_n": 1033.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 562.0, "completions/max_terminated_length": 562.0, "completions/mean_length": 203.85546875, "completions/mean_terminated_length": 204.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.07786666666666667, "grad_norm": 0.0038799333851784468, "learning_rate": 3.5277777777777784e-06, "loss": 0.0907, "num_tokens": 16651499.0, "reward": 0.9922223091125488, "reward_std": 0.022208530455827713, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8555382490158081, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.893372992053628, "aux_distill/mean_u": 0.33364173770817035, "aux_distill/n_active_tok": 148.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4790697674418605, "calib/avg_num_step_conf": 4.703125, "calib/ece": 0.15624666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007306976744186046, "calib/mean_conf": 0.0006160784313725491, "calib/mu_c": 0.0, "calib/mu_w": 0.0007306976744186046, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005473724787808759, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2705263157894737, "calib/step_q_c_n": 171.0, "calib/step_q_gap": -0.02696138992204611, "calib/step_q_w": 0.29748770571151983, "calib/step_q_w_n": 1033.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 514.0, "completions/max_terminated_length": 514.0, "completions/mean_length": 198.390625, "completions/mean_terminated_length": 199.16864013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.07893333333333333, "grad_norm": 0.004464035853743553, "learning_rate": 3.5e-06, "loss": 0.1062, "num_tokens": 16830023.0, "reward": 0.992172360420227, "reward_std": 0.022139202803373337, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8359072804450989, "rewards/format_reward_step": 0.9921875, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8780819457024336, "aux_distill/mean_u": 0.308437020329091, "aux_distill/n_active_tok": 152.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49765258215962443, "calib/avg_num_step_conf": 4.76953125, "calib/ece": 0.16466666666666666, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.694835680751174e-05, "calib/mean_conf": 3.9215686274509805e-05, "calib/mu_c": 0.0, "calib/mu_w": 4.694835680751174e-05, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0006249951941376166, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3568253968253968, "calib/step_q_c_n": 189.0, "calib/step_q_gap": 0.05972268364710226, "calib/step_q_w": 0.29710271317829456, "calib/step_q_w_n": 1032.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 442.0, "completions/max_terminated_length": 442.0, "completions/mean_length": 204.1484375, "completions/mean_terminated_length": 204.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.08, "grad_norm": 0.003264680504798889, "learning_rate": 3.4722222222222224e-06, "loss": 0.0864, "num_tokens": 17010845.0, "reward": 0.9960935711860657, "reward_std": 0.01104909647256136, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8320308923721313, "rewards/format_reward_step": 0.99609375, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8988117761909962, "aux_distill/mean_u": 0.3584846229869654, "aux_distill/n_active_tok": 158.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49528301886792453, "calib/avg_num_step_conf": 4.953125, "calib/ece": 0.16854901960784316, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.433962264150943e-05, "calib/mean_conf": 7.843137254901961e-05, "calib/mu_c": 0.0, "calib/mu_w": 9.433962264150943e-05, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0008821350493491759, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3034035897435898, "calib/step_q_c_n": 195.0, "calib/step_q_gap": 0.03379501565225712, "calib/step_q_w": 0.26960857409133265, "calib/step_q_w_n": 1073.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 530.0, "completions/max_terminated_length": 530.0, "completions/mean_length": 208.640625, "completions/mean_terminated_length": 209.45883178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.08106666666666666, "grad_norm": 0.003597772680222988, "learning_rate": 3.444444444444445e-06, "loss": 0.09, "num_tokens": 17191121.0, "reward": 0.9941402673721313, "reward_std": 0.016573920845985413, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8242179751396179, "rewards/format_reward_step": 0.9921875, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8938879240304232, "aux_distill/mean_u": 0.3483319028167608, "aux_distill/n_active_tok": 160.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49333333333333335, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.12101171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.333333333333334e-05, "calib/mean_conf": 8.203125e-05, "calib/mu_c": 0.0, "calib/mu_w": 9.333333333333334e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0008822851715989779, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.35564285714285715, "calib/step_q_c_n": 140.0, "calib/step_q_gap": 0.04764460692413447, "calib/step_q_w": 0.3079982502187227, "calib/step_q_w_n": 1143.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 432.0, "completions/max_terminated_length": 432.0, "completions/mean_length": 203.703125, "completions/mean_terminated_length": 204.50196838378906, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.08213333333333334, "grad_norm": 0.0016254527727141976, "learning_rate": 3.416666666666667e-06, "loss": 0.0897, "num_tokens": 17371741.0, "reward": 0.9999996423721313, "reward_std": 1.1108706985396566e-06, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8789054751396179, "rewards/format_reward_step": 1.0, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8695187773555517, "aux_distill/mean_u": 0.3649508815295921, "aux_distill/n_active_tok": 184.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5129910714285715, "calib/avg_num_step_conf": 5.77734375, "calib/ece": 0.2186171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003557142857142857, "calib/mean_conf": 0.0001328125, "calib/mu_c": 0.0004107142857142857, "calib/mu_w": 5.4999999999999995e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0011948528527997704, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32073717948717945, "calib/step_q_c_n": 312.0, "calib/step_q_gap": 0.033710615648276265, "calib/step_q_w": 0.2870265638389032, "calib/step_q_w_n": 1167.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 679.0, "completions/max_terminated_length": 679.0, "completions/mean_length": 235.140625, "completions/mean_terminated_length": 236.06275939941406, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.0832, "grad_norm": 0.003223991021513939, "learning_rate": 3.3888888888888893e-06, "loss": 0.0852, "num_tokens": 17563769.0, "reward": 1.000089168548584, "reward_std": 0.0002531877835281193, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7814282178878784, "rewards/format_reward_step": 1.0, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8652449604123831, "aux_distill/mean_u": 0.3407855392361494, "aux_distill/n_active_tok": 176.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4996230073244291, "calib/avg_num_step_conf": 5.54296875, "calib/ece": 0.1723729411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 6.186988367083154e-05, "calib/mean_conf": 0.000176078431372549, "calib/mu_c": 0.00022727272727272727, "calib/mu_w": 0.00016540284360189574, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0012663117331894332, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.33054794520547953, "calib/step_q_c_n": 219.0, "calib/step_q_gap": 0.027218778538812882, "calib/step_q_w": 0.30332916666666665, "calib/step_q_w_n": 1200.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 629.0, "completions/max_terminated_length": 629.0, "completions/mean_length": 228.3046875, "completions/mean_terminated_length": 229.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.08426666666666667, "grad_norm": 0.0034459615126252174, "learning_rate": 3.3611111111111117e-06, "loss": 0.0825, "num_tokens": 17752399.0, "reward": 0.9922257661819458, "reward_std": 0.022208131849765778, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8203890323638916, "rewards/format_reward_step": 0.9921875, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9012061804533005, "aux_distill/mean_u": 0.3593384027984835, "aux_distill/n_active_tok": 172.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49230769230769234, "calib/avg_num_step_conf": 5.74609375, "calib/ece": 0.22915573122529645, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00012102564102564102, "calib/mean_conf": 9.328063241106719e-05, "calib/mu_c": 0.0, "calib/mu_w": 0.00012102564102564102, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0009022557963437884, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24938356164383566, "calib/step_q_c_n": 292.0, "calib/step_q_gap": -0.06631151893292433, "calib/step_q_w": 0.31569508057676, "calib/step_q_w_n": 1179.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 554.0, "completions/max_terminated_length": 554.0, "completions/mean_length": 227.3203125, "completions/mean_terminated_length": 228.21177673339844, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.08533333333333333, "grad_norm": 0.002913886681199074, "learning_rate": 3.3333333333333333e-06, "loss": 0.0709, "num_tokens": 17936561.0, "reward": 0.9882808327674866, "reward_std": 0.033146779984235764, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7617179155349731, "rewards/format_reward_step": 0.98828125, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8805477563291788, "aux_distill/mean_u": 0.3763560604832592, "aux_distill/n_active_tok": 201.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4953051643192488, "calib/avg_num_step_conf": 6.3203125, "calib/ece": 0.167890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.389671361502347e-05, "calib/mean_conf": 7.8125e-05, "calib/mu_c": 0.0, "calib/mu_w": 9.389671361502347e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0008804240366863005, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3075, "calib/step_q_c_n": 260.0, "calib/step_q_gap": 0.007315169366715779, "calib/step_q_w": 0.3001848306332842, "calib/step_q_w_n": 1358.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 822.0, "completions/max_terminated_length": 822.0, "completions/mean_length": 253.32421875, "completions/mean_terminated_length": 254.31765747070312, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.0864, "grad_norm": 0.0014355859020724893, "learning_rate": 3.3055555555555558e-06, "loss": 0.0871, "num_tokens": 18131468.0, "reward": 0.9999996423721313, "reward_std": 1.1056023367927992e-06, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8320304751396179, "rewards/format_reward_step": 1.0, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8585575744509697, "aux_distill/mean_u": 0.3321043982053774, "aux_distill/n_active_tok": 180.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5007708911501696, "calib/avg_num_step_conf": 5.671875, "calib/ece": 0.18479409448818898, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002748586699558023, "calib/mean_conf": 0.00024527559055118113, "calib/mu_c": 2.1276595744680852e-05, "calib/mu_w": 0.00029613526570048315, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0026527428463929965, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3399958333333333, "calib/step_q_c_n": 240.0, "calib/step_q_gap": 0.04995127887788775, "calib/step_q_w": 0.2900445544554455, "calib/step_q_w_n": 1212.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 508.0, "completions/max_terminated_length": 508.0, "completions/mean_length": 239.60546875, "completions/mean_terminated_length": 240.54510498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.08746666666666666, "grad_norm": 0.003587129293009639, "learning_rate": 3.277777777777778e-06, "loss": 0.0863, "num_tokens": 18322167.0, "reward": 0.9921879172325134, "reward_std": 0.022108623757958412, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8085945248603821, "rewards/format_reward_step": 0.9921875, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8511873986572027, "aux_distill/mean_u": 0.33659491248510026, "aux_distill/n_active_tok": 194.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49765258215962443, "calib/avg_num_step_conf": 6.359375, "calib/ece": 0.16469411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -1.4084507042253522e-05, "calib/mean_conf": 1.1764705882352942e-05, "calib/mu_c": 0.0, "calib/mu_w": 1.4084507042253522e-05, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.000187498558241285, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3059545454545455, "calib/step_q_c_n": 220.0, "calib/step_q_gap": 0.007154829545454544, "calib/step_q_w": 0.29879971590909093, "calib/step_q_w_n": 1408.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 654.0, "completions/max_terminated_length": 654.0, "completions/mean_length": 251.76171875, "completions/mean_terminated_length": 252.74903869628906, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.08853333333333334, "grad_norm": 0.0016355804400518537, "learning_rate": 3.2500000000000002e-06, "loss": 0.0747, "num_tokens": 18517690.0, "reward": 0.99609375, "reward_std": 0.011048593558371067, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.83203125, "rewards/format_reward_step": 0.99609375, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9107771255075932, "aux_distill/mean_u": 0.38344631357969455, "aux_distill/n_active_tok": 192.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5032362459546925, "calib/avg_num_step_conf": 6.0078125, "calib/ece": 0.1888740157480315, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00013066343042071198, "calib/mean_conf": 0.00010236220472440946, "calib/mu_c": 0.00020833333333333335, "calib/mu_w": 7.766990291262136e-05, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0009377023827267995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2435294117647059, "calib/step_q_c_n": 323.0, "calib/step_q_gap": -0.08156408617768093, "calib/step_q_w": 0.32509349794238684, "calib/step_q_w_n": 1215.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1439.0, "completions/max_terminated_length": 1439.0, "completions/mean_length": 246.78515625, "completions/mean_terminated_length": 246.78515625, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.0896, "grad_norm": 0.003886830760166049, "learning_rate": 3.2222222222222227e-06, "loss": 0.1125, "num_tokens": 18710595.0, "reward": 0.9922261238098145, "reward_std": 0.02220771461725235, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8047647476196289, "rewards/format_reward_step": 0.9921875, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8618598934262991, "aux_distill/mean_u": 0.33070176505424564, "aux_distill/n_active_tok": 199.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5092694164107064, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.16854901960784316, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00018538832821412903, "calib/mean_conf": 7.843137254901961e-05, "calib/mu_c": 0.00023255813953488373, "calib/mu_w": 4.7169811320754715e-05, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.000882135049349176, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2995575221238938, "calib/step_q_c_n": 226.0, "calib/step_q_gap": -0.04049028489365003, "calib/step_q_w": 0.34004780701754383, "calib/step_q_w_n": 1368.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 626.0, "completions/max_terminated_length": 626.0, "completions/mean_length": 251.81640625, "completions/mean_terminated_length": 252.80393981933594, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.09066666666666667, "grad_norm": 0.002688215347006917, "learning_rate": 3.1944444444444443e-06, "loss": 0.0684, "num_tokens": 18906692.0, "reward": 0.9961324334144592, "reward_std": 0.011159027926623821, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8282023668289185, "rewards/format_reward_step": 0.99609375, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8568615298718214, "aux_distill/mean_u": 0.35021126297831856, "aux_distill/n_active_tok": 209.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49980776624375234, "calib/avg_num_step_conf": 6.5546875, "calib/ece": 0.19983921568627452, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001764705882352941, "calib/mean_conf": 0.00016078431372549016, "calib/mu_c": 1.9607843137254903e-05, "calib/mu_w": 0.000196078431372549, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001243662906123547, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3664554545454546, "calib/step_q_c_n": 330.0, "calib/step_q_gap": 0.06424432694901538, "calib/step_q_w": 0.3022111275964392, "calib/step_q_w_n": 1348.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 257.46484375, "completions/mean_terminated_length": 258.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.09173333333333333, "grad_norm": 0.0028597498312592506, "learning_rate": 3.1666666666666667e-06, "loss": 0.074, "num_tokens": 19101923.0, "reward": 0.9960969090461731, "reward_std": 0.01106179878115654, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7968812584877014, "rewards/format_reward_step": 0.99609375, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.883620098233223, "aux_distill/mean_u": 0.3155454990718053, "aux_distill/n_active_tok": 179.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5023886639676113, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.25470588235294117, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -5.6680161943319844e-05, "calib/mean_conf": 0.00019607843137254904, "calib/mu_c": 0.00015384615384615385, "calib/mu_w": 0.0002105263157894737, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0020676747178767167, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31411209439528026, "calib/step_q_c_n": 339.0, "calib/step_q_gap": -0.0005639708812383293, "calib/step_q_w": 0.3146760652765186, "calib/step_q_w_n": 1103.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 582.0, "completions/max_terminated_length": 582.0, "completions/mean_length": 238.671875, "completions/mean_terminated_length": 239.6078643798828, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.0928, "grad_norm": 0.0026609438937157393, "learning_rate": 3.138888888888889e-06, "loss": 0.0891, "num_tokens": 19292327.0, "reward": 0.9961307048797607, "reward_std": 0.011164000257849693, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7422612905502319, "rewards/format_reward_step": 0.99609375, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8428022786974907, "aux_distill/mean_u": 0.33032704923695255, "aux_distill/n_active_tok": 211.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49774774774774777, "calib/avg_num_step_conf": 6.7109375, "calib/ece": 0.132796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -1.801801801801802e-05, "calib/mean_conf": 1.5625e-05, "calib/mu_c": 0.0, "calib/mu_w": 1.801801801801802e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0002495112409792393, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.38029126213592235, "calib/step_q_c_n": 206.0, "calib/step_q_gap": 0.04931903991370007, "calib/step_q_w": 0.3309722222222223, "calib/step_q_w_n": 1512.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 924.0, "completions/max_terminated_length": 924.0, "completions/mean_length": 270.84765625, "completions/mean_terminated_length": 271.9098205566406, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.09386666666666667, "grad_norm": 0.0018712286837399006, "learning_rate": 3.1111111111111116e-06, "loss": 0.0842, "num_tokens": 19495320.0, "reward": 1.0, "reward_std": 8.843431942295865e-08, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8671874403953552, "rewards/format_reward_step": 1.0, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8876792825758457, "aux_distill/mean_u": 0.3874043072057041, "aux_distill/n_active_tok": 203.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49770642201834864, "calib/avg_num_step_conf": 6.3671875, "calib/ece": 0.1483984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.587155963302752e-05, "calib/mean_conf": 3.90625e-05, "calib/mu_c": 0.0, "calib/mu_w": 4.587155963302752e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.000623778102448098, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30855855855855857, "calib/step_q_c_n": 222.0, "calib/step_q_gap": -0.03795564598689599, "calib/step_q_w": 0.34651420454545456, "calib/step_q_w_n": 1408.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 261.14453125, "completions/mean_terminated_length": 262.16864013671875, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.09493333333333333, "grad_norm": 0.002203390235081315, "learning_rate": 3.0833333333333336e-06, "loss": 0.0853, "num_tokens": 19694869.0, "reward": 0.9960935711860657, "reward_std": 0.01104909647256136, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8476558923721313, "rewards/format_reward_step": 0.99609375, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8904574010521173, "aux_distill/mean_u": 0.38580885479020766, "aux_distill/n_active_tok": 204.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5131578947368421, "calib/avg_num_step_conf": 6.42578125, "calib/ece": 0.1483984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002631578947368421, "calib/mean_conf": 3.90625e-05, "calib/mu_c": 0.0002631578947368421, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.000623778102448098, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.339378947368421, "calib/step_q_c_n": 209.0, "calib/step_q_gap": -0.00023860137809711413, "calib/step_q_w": 0.3396175487465181, "calib/step_q_w_n": 1436.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 945.0, "completions/max_terminated_length": 945.0, "completions/mean_length": 265.22265625, "completions/mean_terminated_length": 266.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.096, "grad_norm": 0.0022498685866594315, "learning_rate": 3.055555555555556e-06, "loss": 0.0823, "num_tokens": 19889894.0, "reward": 1.0000388622283936, "reward_std": 0.00010993177420459688, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8516402244567871, "rewards/format_reward_step": 1.0, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8649949803948402, "aux_distill/mean_u": 0.33981641652048383, "aux_distill/n_active_tok": 213.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5069330712959931, "calib/avg_num_step_conf": 6.69921875, "calib/ece": 0.1678515625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00013866142591986025, "calib/mean_conf": 0.0001171875, "calib/mu_c": 0.00023255813953488373, "calib/mu_w": 9.389671361502347e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0010761701026528055, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3283916083916084, "calib/step_q_c_n": 286.0, "calib/step_q_gap": -0.01630048398067996, "calib/step_q_w": 0.3446920923722884, "calib/step_q_w_n": 1429.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 625.0, "completions/max_terminated_length": 625.0, "completions/mean_length": 267.734375, "completions/mean_terminated_length": 268.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.09706666666666666, "grad_norm": 0.0024775159545242786, "learning_rate": 3.0277777777777776e-06, "loss": 0.0816, "num_tokens": 20089954.0, "reward": 1.000038504600525, "reward_std": 0.00011103737051598728, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.832108199596405, "rewards/format_reward_step": 1.0, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8489798828959465, "aux_distill/mean_u": 0.3153927456116345, "aux_distill/n_active_tok": 210.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49324324324324326, "calib/avg_num_step_conf": 6.58984375, "calib/ece": 0.1224110671936759, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00013513513513513514, "calib/mean_conf": 0.00011857707509881423, "calib/mu_c": 0.0, "calib/mu_w": 0.00013513513513513514, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0010824556472434112, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.297, "calib/step_q_c_n": 190.0, "calib/step_q_gap": -0.021366065464261896, "calib/step_q_w": 0.3183660654642619, "calib/step_q_w_n": 1497.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 765.0, "completions/max_terminated_length": 765.0, "completions/mean_length": 269.25390625, "completions/mean_terminated_length": 270.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.09813333333333334, "grad_norm": 0.002843436785042286, "learning_rate": 3e-06, "loss": 0.0741, "num_tokens": 20289411.0, "reward": 0.9882806539535522, "reward_std": 0.033146657049655914, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8671863079071045, "rewards/format_reward_step": 0.98828125, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8637429755181074, "aux_distill/mean_u": 0.3244587877849719, "aux_distill/n_active_tok": 224.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4953271028037383, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.1574015748031496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.345794392523364e-05, "calib/mean_conf": 7.874015748031496e-05, "calib/mu_c": 0.0, "calib/mu_w": 9.345794392523364e-05, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0008838560756158915, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3286440677966102, "calib/step_q_c_n": 236.0, "calib/step_q_gap": -9.639021865692765e-05, "calib/step_q_w": 0.32874045801526713, "calib/step_q_w_n": 1572.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 968.0, "completions/max_terminated_length": 968.0, "completions/mean_length": 285.59765625, "completions/mean_terminated_length": 286.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.0992, "grad_norm": 0.0027552025858312845, "learning_rate": 2.9722222222222225e-06, "loss": 0.0923, "num_tokens": 20492108.0, "reward": 0.9921871423721313, "reward_std": 0.02209819294512272, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8359367251396179, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.862243564799428, "aux_distill/mean_u": 0.30363283298666166, "aux_distill/n_active_tok": 213.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4951923076923077, "calib/avg_num_step_conf": 6.7734375, "calib/ece": 0.18742578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.134615384615385e-05, "calib/mean_conf": 7.421875e-05, "calib/mu_c": 0.0, "calib/mu_w": 9.134615384615385e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0008375695954059205, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2911636363636364, "calib/step_q_c_n": 275.0, "calib/step_q_gap": -0.06487872141566448, "calib/step_q_w": 0.3560423577793009, "calib/step_q_w_n": 1459.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1027.0, "completions/max_terminated_length": 1027.0, "completions/mean_length": 270.23828125, "completions/mean_terminated_length": 271.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.10026666666666667, "grad_norm": 0.0026547429151833057, "learning_rate": 2.944444444444445e-06, "loss": 0.086, "num_tokens": 20693777.0, "reward": 0.9999996423721313, "reward_std": 1.0006114052885096e-06, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8124992847442627, "rewards/format_reward_step": 1.0, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.871882077306509, "aux_distill/mean_u": 0.3612003757729328, "aux_distill/n_active_tok": 210.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5216162287994679, "calib/avg_num_step_conf": 6.58984375, "calib/ece": 0.24203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00043232457598935814, "calib/mean_conf": 0.00015625, "calib/mu_c": 0.0004838709677419355, "calib/mu_w": 5.154639175257732e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0012401959270615269, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.38400000000000006, "calib/step_q_c_n": 310.0, "calib/step_q_gap": 0.05170951343500374, "calib/step_q_w": 0.3322904865649963, "calib/step_q_w_n": 1377.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 271.01171875, "completions/mean_terminated_length": 272.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.10133333333333333, "grad_norm": 0.0028460733592510223, "learning_rate": 2.916666666666667e-06, "loss": 0.0904, "num_tokens": 20893092.0, "reward": 1.0001163482666016, "reward_std": 0.00033034812076948583, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7580453157424927, "rewards/format_reward_step": 1.0, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8325482532382011, "aux_distill/mean_u": 0.35870209660360397, "aux_distill/n_active_tok": 222.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5041836515769148, "calib/avg_num_step_conf": 6.95703125, "calib/ece": 0.3083984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.367303153829653e-05, "calib/mean_conf": 0.0001953125, "calib/mu_c": 0.00025316455696202533, "calib/mu_w": 0.0001694915254237288, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0013838273112436214, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3241626506024096, "calib/step_q_c_n": 498.0, "calib/step_q_gap": -0.007832283146616104, "calib/step_q_w": 0.3319949337490257, "calib/step_q_w_n": 1283.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 749.0, "completions/max_terminated_length": 749.0, "completions/mean_length": 270.953125, "completions/mean_terminated_length": 272.0157165527344, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.1024, "grad_norm": 0.00353445066139102, "learning_rate": 2.888888888888889e-06, "loss": 0.0788, "num_tokens": 21092080.0, "reward": 1.0000771284103394, "reward_std": 0.00022152194287627935, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6915605068206787, "rewards/format_reward_step": 1.0, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8440025225281715, "aux_distill/mean_u": 0.35295863647655806, "aux_distill/n_active_tok": 232.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49776785714285715, "calib/avg_num_step_conf": 7.3046875, "calib/ece": 0.1249609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.464285714285714e-05, "calib/mean_conf": 3.90625e-05, "calib/mu_c": 0.0, "calib/mu_w": 4.464285714285714e-05, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.000623778102448098, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3241626794258373, "calib/step_q_c_n": 209.0, "calib/step_q_gap": -0.011048097214740704, "calib/step_q_w": 0.335210776640578, "calib/step_q_w_n": 1661.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 802.0, "completions/max_terminated_length": 802.0, "completions/mean_length": 280.48046875, "completions/mean_terminated_length": 281.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.10346666666666667, "grad_norm": 0.0012264709221199155, "learning_rate": 2.861111111111111e-06, "loss": 0.0844, "num_tokens": 21292763.0, "reward": 0.9999998211860657, "reward_std": 5.528011683963996e-07, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8749996423721313, "rewards/format_reward_step": 1.0, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8470690567046404, "aux_distill/mean_u": 0.3851828100309297, "aux_distill/n_active_tok": 223.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49780997304582203, "calib/avg_num_step_conf": 6.97265625, "calib/ece": 0.16510236220472443, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -1.6621743036837383e-05, "calib/mean_conf": 0.0002519685039370079, "calib/mu_c": 0.0002380952380952381, "calib/mu_w": 0.0002547169811320755, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0015367851910457256, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2570394265232975, "calib/step_q_c_n": 279.0, "calib/step_q_gap": -0.05668029459224033, "calib/step_q_w": 0.3137197211155378, "calib/step_q_w_n": 1506.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 283.00390625, "completions/mean_terminated_length": 283.00390625, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.10453333333333334, "grad_norm": 0.0035589830949902534, "learning_rate": 2.8333333333333335e-06, "loss": 0.1186, "num_tokens": 21495204.0, "reward": 0.9922254085540771, "reward_std": 0.02220987156033516, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8282006978988647, "rewards/format_reward_step": 0.9921875, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8080371990799904, "aux_distill/mean_u": 0.3410837156640096, "aux_distill/n_active_tok": 241.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4928571428571429, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.17953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00019047619047619048, "calib/mean_conf": 0.00015625, "calib/mu_c": 0.0, "calib/mu_w": 0.00019047619047619048, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0015229366163764003, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28961538461538466, "calib/step_q_c_n": 286.0, "calib/step_q_gap": -0.0284649511160302, "calib/step_q_w": 0.31808033573141486, "calib/step_q_w_n": 1668.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 875.0, "completions/max_terminated_length": 875.0, "completions/mean_length": 286.265625, "completions/mean_terminated_length": 287.38824462890625, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.1056, "grad_norm": 0.0018437359249219298, "learning_rate": 2.805555555555556e-06, "loss": 0.0813, "num_tokens": 21698096.0, "reward": 0.9999988079071045, "reward_std": 3.3156775316456333e-06, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.820310115814209, "rewards/format_reward_step": 1.0, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8550351019948721, "aux_distill/mean_u": 0.38581492120683264, "aux_distill/n_active_tok": 234.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5162135604323617, "calib/avg_num_step_conf": 7.46484375, "calib/ece": 0.1677734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00032427120864723225, "calib/mean_conf": 0.0001953125, "calib/mu_c": 0.00046511627906976747, "calib/mu_w": 0.00014084507042253522, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0013838273112436216, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3404095563139932, "calib/step_q_c_n": 293.0, "calib/step_q_gap": 0.015230322692237952, "calib/step_q_w": 0.32517923362175527, "calib/step_q_w_n": 1618.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 886.0, "completions/max_terminated_length": 886.0, "completions/mean_length": 282.94921875, "completions/mean_terminated_length": 284.058837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.10666666666666667, "grad_norm": 0.0025342495646327734, "learning_rate": 2.7777777777777783e-06, "loss": 0.092, "num_tokens": 21901747.0, "reward": 1.000077247619629, "reward_std": 0.00022152194287627935, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8321855068206787, "rewards/format_reward_step": 1.0, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8608838450163603, "aux_distill/mean_u": 0.33259501798987035, "aux_distill/n_active_tok": 235.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5026641705069124, "calib/avg_num_step_conf": 7.41015625, "calib/ece": 0.12125490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 1.0080645161290316e-05, "calib/mean_conf": 0.00031372549019607844, "calib/mu_c": 0.0003225806451612903, "calib/mu_w": 0.0003125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0019552864097753587, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.39691919191919184, "calib/step_q_c_n": 198.0, "calib/step_q_gap": 0.061659980618426646, "calib/step_q_w": 0.3352592113007652, "calib/step_q_w_n": 1699.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 832.0, "completions/max_terminated_length": 832.0, "completions/mean_length": 292.34765625, "completions/mean_terminated_length": 293.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.10773333333333333, "grad_norm": 0.0023431070148944855, "learning_rate": 2.7500000000000004e-06, "loss": 0.0809, "num_tokens": 22107388.0, "reward": 0.9961308240890503, "reward_std": 0.01106975693255663, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8750742077827454, "rewards/format_reward_step": 0.99609375, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8173305280506611, "aux_distill/mean_u": 0.3504695026670667, "aux_distill/n_active_tok": 244.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49953297216514103, "calib/avg_num_step_conf": 7.64453125, "calib/ece": 0.20764705882352943, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.340556697179174e-06, "calib/mean_conf": 0.00019607843137254904, "calib/mu_c": 0.00018867924528301886, "calib/mu_w": 0.00019801980198019803, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001386483884679505, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33429508196721314, "calib/step_q_c_n": 305.0, "calib/step_q_gap": 0.04493951295994919, "calib/step_q_w": 0.28935556900726395, "calib/step_q_w_n": 1652.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2730.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 290.765625, "completions/mean_terminated_length": 290.765625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.1088, "grad_norm": 0.0022615944035351276, "learning_rate": 2.7222222222222224e-06, "loss": 0.1134, "num_tokens": 22312328.0, "reward": 0.9961318373680115, "reward_std": 0.011160054244101048, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.789138674736023, "rewards/format_reward_step": 0.99609375, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8737247381359339, "aux_distill/mean_u": 0.36661455320470204, "aux_distill/n_active_tok": 225.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5039115646258504, "calib/avg_num_step_conf": 7.1328125, "calib/ece": 0.23409765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 7.312925170068022e-05, "calib/mean_conf": 0.00027734375, "calib/mu_c": 0.0003333333333333333, "calib/mu_w": 0.0002602040816326531, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0018554276311233315, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.41829850746268654, "calib/step_q_c_n": 335.0, "calib/step_q_gap": 0.0982855631300239, "calib/step_q_w": 0.32001294433266264, "calib/step_q_w_n": 1491.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 943.0, "completions/max_terminated_length": 943.0, "completions/mean_length": 295.15625, "completions/mean_terminated_length": 296.3137512207031, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.10986666666666667, "grad_norm": 0.0028516261372715235, "learning_rate": 2.6944444444444444e-06, "loss": 0.0924, "num_tokens": 22516248.0, "reward": 0.9961703419685364, "reward_std": 0.011269855313003063, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.761871874332428, "rewards/format_reward_step": 0.99609375, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8354058209806681, "aux_distill/mean_u": 0.3212144089934268, "aux_distill/n_active_tok": 226.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4834905660377358, "calib/avg_num_step_conf": 7.1171875, "calib/ece": 0.1716015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00033018867924528304, "calib/mean_conf": 0.0002734375, "calib/mu_c": 0.0, "calib/mu_w": 0.00033018867924528304, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0016308301363396958, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3244781144781145, "calib/step_q_c_n": 297.0, "calib/step_q_gap": -0.013587459292377313, "calib/step_q_w": 0.3380655737704918, "calib/step_q_w_n": 1525.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1290.0, "completions/max_terminated_length": 1290.0, "completions/mean_length": 286.76953125, "completions/mean_terminated_length": 287.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.11093333333333333, "grad_norm": 0.0023796723689883947, "learning_rate": 2.666666666666667e-06, "loss": 0.1023, "num_tokens": 22720149.0, "reward": 0.9960923790931702, "reward_std": 0.01105241384357214, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8242160081863403, "rewards/format_reward_step": 0.99609375, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.868424192070961, "aux_distill/mean_u": 0.32407661465809195, "aux_distill/n_active_tok": 222.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48873873873873874, "calib/avg_num_step_conf": 7.3125, "calib/ece": 0.12233201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00022522522522522523, "calib/mean_conf": 0.0001976284584980237, "calib/mu_c": 0.0, "calib/mu_w": 0.00022522522522522523, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0013918432301706727, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2927948453608248, "calib/step_q_c_n": 194.0, "calib/step_q_gap": -0.03498703783345414, "calib/step_q_w": 0.3277818831942789, "calib/step_q_w_n": 1678.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 286.6796875, "completions/mean_terminated_length": 287.8039245605469, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.112, "grad_norm": 0.0025983734522014856, "learning_rate": 2.6388888888888893e-06, "loss": 0.1346, "num_tokens": 22923107.0, "reward": 0.9882802963256836, "reward_std": 0.03314775973558426, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8671855926513672, "rewards/format_reward_step": 0.98828125, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8548662774264812, "aux_distill/mean_u": 0.3412352244089931, "aux_distill/n_active_tok": 195.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48, "calib/avg_num_step_conf": 6.10546875, "calib/ece": 0.11729411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00039999999999999996, "calib/mean_conf": 0.0003529411764705882, "calib/mu_c": 0.0, "calib/mu_w": 0.00039999999999999996, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018452220166303669, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3631176470588235, "calib/step_q_c_n": 170.0, "calib/step_q_gap": 0.06649725940627504, "calib/step_q_w": 0.29662038765254845, "calib/step_q_w_n": 1393.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2404.0, "completions/max_terminated_length": 2404.0, "completions/mean_length": 262.546875, "completions/mean_terminated_length": 262.546875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.11306666666666666, "grad_norm": 0.0020745107904076576, "learning_rate": 2.6111111111111113e-06, "loss": 0.1196, "num_tokens": 23118711.0, "reward": 0.9960920214653015, "reward_std": 0.011053518392145634, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8789027333259583, "rewards/format_reward_step": 0.99609375, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8563811108469963, "aux_distill/mean_u": 0.358172699943934, "aux_distill/n_active_tok": 218.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5149049539170507, "calib/avg_num_step_conf": 6.8359375, "calib/ece": 0.12086274509803921, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002980990783410138, "calib/mean_conf": 0.0007058823529411764, "calib/mu_c": 0.000967741935483871, "calib/mu_w": 0.0006696428571428571, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0025613577714208515, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2366346153846154, "calib/step_q_c_n": 208.0, "calib/step_q_gap": -0.10943451561408757, "calib/step_q_w": 0.34606913099870296, "calib/step_q_w_n": 1542.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 980.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 277.1171875, "completions/mean_terminated_length": 277.1171875, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.11413333333333334, "grad_norm": 0.0028200845699757338, "learning_rate": 2.5833333333333337e-06, "loss": 0.1, "num_tokens": 23318077.0, "reward": 0.9962074756622314, "reward_std": 0.01130930706858635, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8752273321151733, "rewards/format_reward_step": 0.99609375, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8544626701623201, "aux_distill/mean_u": 0.3307901570017504, "aux_distill/n_active_tok": 203.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4869109947643979, "calib/avg_num_step_conf": 6.42578125, "calib/ece": 0.2537109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002617801047120419, "calib/mean_conf": 0.0001953125, "calib/mu_c": 0.0, "calib/mu_w": 0.0002617801047120419, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0013838273112436214, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29852248520710056, "calib/step_q_c_n": 338.0, "calib/step_q_gap": 0.005442990180321683, "calib/step_q_w": 0.2930794950267789, "calib/step_q_w_n": 1307.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1036.0, "completions/max_terminated_length": 1036.0, "completions/mean_length": 258.140625, "completions/mean_terminated_length": 259.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.1152, "grad_norm": 0.0021364479325711727, "learning_rate": 2.5555555555555557e-06, "loss": 0.0865, "num_tokens": 23511201.0, "reward": 0.9999990463256836, "reward_std": 2.764005785138579e-06, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7460918426513672, "rewards/format_reward_step": 1.0, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8432301990687847, "aux_distill/mean_u": 0.3266780022049978, "aux_distill/n_active_tok": 221.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4978860703159768, "calib/avg_num_step_conf": 7.0, "calib/ece": 0.1637890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.227859368046284e-05, "calib/mean_conf": 0.00027343749999999997, "calib/mu_c": 0.0002380952380952381, "calib/mu_w": 0.00028037383177570094, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0016308301363396956, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.34582068965517243, "calib/step_q_c_n": 232.0, "calib/step_q_gap": -0.007160079575596778, "calib/step_q_w": 0.3529807692307692, "calib/step_q_w_n": 1560.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1145.0, "completions/max_terminated_length": 1145.0, "completions/mean_length": 277.08984375, "completions/mean_terminated_length": 278.1764831542969, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.11626666666666667, "grad_norm": 0.002132768277078867, "learning_rate": 2.5277777777777778e-06, "loss": 0.0849, "num_tokens": 23710544.0, "reward": 1.000037670135498, "reward_std": 0.00011277615703875199, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8360128998756409, "rewards/format_reward_step": 1.0, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8624531216919422, "aux_distill/mean_u": 0.3428116708899324, "aux_distill/n_active_tok": 202.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5102040816326531, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.1913671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00020408163265306123, "calib/mean_conf": 3.90625e-05, "calib/mu_c": 0.00020408163265306123, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.000623778102448098, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3759057971014493, "calib/step_q_c_n": 276.0, "calib/step_q_gap": 0.062253648953301155, "calib/step_q_w": 0.3136521481481481, "calib/step_q_w_n": 1350.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 688.0, "completions/max_terminated_length": 688.0, "completions/mean_length": 261.16015625, "completions/mean_terminated_length": 262.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.11733333333333333, "grad_norm": 0.003013479057699442, "learning_rate": 2.5e-06, "loss": 0.0906, "num_tokens": 23906129.0, "reward": 0.9961326122283936, "reward_std": 0.0111584747210145, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8047652244567871, "rewards/format_reward_step": 0.99609375, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8441929463297129, "aux_distill/mean_u": 0.3441107825296833, "aux_distill/n_active_tok": 198.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5240320427236315, "calib/avg_num_step_conf": 6.21875, "calib/ece": 0.16375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004806408544726302, "calib/mean_conf": 0.0003125, "calib/mu_c": 0.0007142857142857143, "calib/mu_w": 0.00023364485981308412, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.001739926363384382, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3002446351931331, "calib/step_q_c_n": 233.0, "calib/step_q_gap": -0.02867736627853723, "calib/step_q_w": 0.32892200147167033, "calib/step_q_w_n": 1359.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 701.0, "completions/max_terminated_length": 701.0, "completions/mean_length": 261.015625, "completions/mean_terminated_length": 262.03924560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.1184, "grad_norm": 0.0034395901020616293, "learning_rate": 2.4722222222222226e-06, "loss": 0.0851, "num_tokens": 24104165.0, "reward": 0.9923031330108643, "reward_std": 0.022335954010486603, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8283562660217285, "rewards/format_reward_step": 0.9921875, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8398895040154457, "aux_distill/mean_u": 0.35261902089203984, "aux_distill/n_active_tok": 203.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5112972555935192, "calib/avg_num_step_conf": 6.390625, "calib/ece": 0.1689763779527559, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00018075608949630773, "calib/mean_conf": 0.00031496062992125983, "calib/mu_c": 0.00046511627906976747, "calib/mu_w": 0.00028436018957345974, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001959032331436965, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3352569169960474, "calib/step_q_c_n": 253.0, "calib/step_q_gap": 0.046753663199952, "calib/step_q_w": 0.2885032537960954, "calib/step_q_w_n": 1383.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 260.8125, "completions/mean_terminated_length": 261.8352966308594, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.11946666666666667, "grad_norm": 0.003930776380002499, "learning_rate": 2.4444444444444447e-06, "loss": 0.0745, "num_tokens": 24302661.0, "reward": 0.994216799736023, "reward_std": 0.01679299585521221, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8243710994720459, "rewards/format_reward_step": 0.9921875, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8919267151504755, "aux_distill/mean_u": 0.3533143225373007, "aux_distill/n_active_tok": 191.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.493953488372093, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.15653333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.418604651162795e-05, "calib/mean_conf": 0.0003294117647058824, "calib/mu_c": 0.00025, "calib/mu_w": 0.00034418604651162795, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001758262747507709, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3289644549763033, "calib/step_q_c_n": 211.0, "calib/step_q_gap": -0.02367498484201619, "calib/step_q_w": 0.35263943981831947, "calib/step_q_w_n": 1321.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2043.0, "completions/max_terminated_length": 2043.0, "completions/mean_length": 248.12890625, "completions/mean_terminated_length": 248.12890625, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.12053333333333334, "grad_norm": 0.002600407926365733, "learning_rate": 2.4166666666666667e-06, "loss": 0.125, "num_tokens": 24495190.0, "reward": 0.996131181716919, "reward_std": 0.011162434704601765, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8399186730384827, "rewards/format_reward_step": 0.99609375, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.861085245385766, "aux_distill/mean_u": 0.32562836972033066, "aux_distill/n_active_tok": 182.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5092592592592593, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.14062745098039214, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 7.610350076103431e-06, "calib/mean_conf": 0.0005490196078431374, "calib/mu_c": 0.0005555555555555556, "calib/mu_w": 0.0005479452054794521, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.003267869279372495, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.33482926829268295, "calib/step_q_c_n": 205.0, "calib/step_q_gap": -0.013357712537988009, "calib/step_q_w": 0.34818698083067096, "calib/step_q_w_n": 1252.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2350.0, "completions/max_terminated_length": 2350.0, "completions/mean_length": 249.95703125, "completions/mean_terminated_length": 249.95703125, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.1216, "grad_norm": 0.0028793588280677795, "learning_rate": 2.388888888888889e-06, "loss": 0.14, "num_tokens": 24688011.0, "reward": 0.9961664080619812, "reward_std": 0.011282390914857388, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8556140661239624, "rewards/format_reward_step": 0.99609375, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8542553428560495, "aux_distill/mean_u": 0.3127482086279962, "aux_distill/n_active_tok": 194.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5236378205128205, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.1866796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010416666666666669, "calib/mean_conf": 0.0008203125000000001, "calib/mu_c": 0.0016666666666666668, "calib/mu_w": 0.000625, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0036054073836868632, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.328165543071161, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.008849874868396057, "calib/step_q_w": 0.31931566820276497, "calib/step_q_w_n": 1302.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 644.0, "completions/max_terminated_length": 644.0, "completions/mean_length": 248.51953125, "completions/mean_terminated_length": 249.49412536621094, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.12266666666666666, "grad_norm": 0.00394708476960659, "learning_rate": 2.361111111111111e-06, "loss": 0.071, "num_tokens": 24880704.0, "reward": 0.9963996410369873, "reward_std": 0.011927313171327114, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8092054128646851, "rewards/format_reward_step": 0.99609375, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8351747281849384, "aux_distill/mean_u": 0.3194099311432214, "aux_distill/n_active_tok": 229.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5019817073170731, "calib/avg_num_step_conf": 7.16015625, "calib/ece": 0.1891304347826087, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.963414634146345e-05, "calib/mean_conf": 0.0005928853754940711, "calib/mu_c": 0.000625, "calib/mu_w": 0.0005853658536585366, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0023616394065280088, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28326771653543303, "calib/step_q_c_n": 254.0, "calib/step_q_gap": -0.04504482304658097, "calib/step_q_w": 0.328312539582014, "calib/step_q_w_n": 1579.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2653.0, "completions/max_terminated_length": 2653.0, "completions/mean_length": 287.15625, "completions/mean_terminated_length": 287.15625, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.12373333333333333, "grad_norm": 0.0038886968977749348, "learning_rate": 2.3333333333333336e-06, "loss": 0.126, "num_tokens": 25082544.0, "reward": 0.9825363159179688, "reward_std": 0.04223396256566048, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.7931976318359375, "rewards/format_reward_step": 0.98046875, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8338127043098211, "aux_distill/mean_u": 0.28180083732390127, "aux_distill/n_active_tok": 213.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5284824548078383, "calib/avg_num_step_conf": 6.73046875, "calib/ece": 0.11273046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009343764241227404, "calib/mean_conf": 0.0005507812500000001, "calib/mu_c": 0.0013793103448275863, "calib/mu_w": 0.00044493392070484586, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0024393741542962284, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.34746341463414643, "calib/step_q_c_n": 205.0, "calib/step_q_gap": 0.037617367203316376, "calib/step_q_w": 0.30984604743083005, "calib/step_q_w_n": 1518.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 650.0, "completions/max_terminated_length": 650.0, "completions/mean_length": 268.328125, "completions/mean_terminated_length": 269.3804016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.1248, "grad_norm": 0.0032077496871352196, "learning_rate": 2.305555555555556e-06, "loss": 0.0736, "num_tokens": 25281644.0, "reward": 1.0001530647277832, "reward_std": 0.0004437759052962065, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.887024998664856, "rewards/format_reward_step": 1.0, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8412914387881756, "aux_distill/mean_u": 0.28512050657689647, "aux_distill/n_active_tok": 187.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4725, "calib/avg_num_step_conf": 5.921875, "calib/ece": 0.2183203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00055, "calib/mean_conf": 0.0004296875, "calib/mu_c": 0.0, "calib/mu_w": 0.00055, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002027866773815221, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27316613418530356, "calib/step_q_c_n": 313.0, "calib/step_q_gap": -0.02819088992109714, "calib/step_q_w": 0.3013570241064007, "calib/step_q_w_n": 1203.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 631.0, "completions/max_terminated_length": 631.0, "completions/mean_length": 249.84375, "completions/mean_terminated_length": 250.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.12586666666666665, "grad_norm": 0.0025783516466617584, "learning_rate": 2.277777777777778e-06, "loss": 0.0849, "num_tokens": 25473420.0, "reward": 0.9999978542327881, "reward_std": 5.317016984918155e-06, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7812457084655762, "rewards/format_reward_step": 1.0, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9055172242224216, "aux_distill/mean_u": 0.35845761656511016, "aux_distill/n_active_tok": 194.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5418560606060605, "calib/avg_num_step_conf": 6.1328125, "calib/ece": 0.1398828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007525252525252525, "calib/mean_conf": 0.0007421875, "calib/mu_c": 0.001388888888888889, "calib/mu_w": 0.0006363636363636364, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0030355860578879573, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.300391061452514, "calib/step_q_c_n": 179.0, "calib/step_q_gap": 0.007846848655964744, "calib/step_q_w": 0.29254421279654924, "calib/step_q_w_n": 1391.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 877.0, "completions/max_terminated_length": 877.0, "completions/mean_length": 263.62890625, "completions/mean_terminated_length": 264.6627502441406, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.12693333333333334, "grad_norm": 0.004290780518203974, "learning_rate": 2.25e-06, "loss": 0.1046, "num_tokens": 25669781.0, "reward": 1.0001903772354126, "reward_std": 0.000559728650841862, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8597558736801147, "rewards/format_reward_step": 1.0, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.873245045542717, "aux_distill/mean_u": 0.33521150432676294, "aux_distill/n_active_tok": 182.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5024099441907661, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.14054901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -8.371385083713861e-05, "calib/mean_conf": 0.000627450980392157, "calib/mu_c": 0.0005555555555555556, "calib/mu_w": 0.0006392694063926942, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003003010152012023, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3134090909090909, "calib/step_q_c_n": 176.0, "calib/step_q_gap": -0.009096763891845827, "calib/step_q_w": 0.32250585480093674, "calib/step_q_w_n": 1281.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 248.625, "completions/mean_terminated_length": 248.625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.128, "grad_norm": 0.0037139912601560354, "learning_rate": 2.222222222222222e-06, "loss": 0.1296, "num_tokens": 25863925.0, "reward": 0.9961671829223633, "reward_std": 0.0112801818177104, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8556156158447266, "rewards/format_reward_step": 0.99609375, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8498683087527752, "aux_distill/mean_u": 0.3374592906505771, "aux_distill/n_active_tok": 214.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4713458584426326, "calib/avg_num_step_conf": 6.73828125, "calib/ece": 0.1516015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005730828311473473, "calib/mean_conf": 0.0007421875, "calib/mu_c": 0.0002564102564102564, "calib/mu_w": 0.0008294930875576037, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0026212654796574403, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3189316239316239, "calib/step_q_c_n": 234.0, "calib/step_q_gap": 0.015775285903454894, "calib/step_q_w": 0.303156338028169, "calib/step_q_w_n": 1491.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 715.0, "completions/max_terminated_length": 715.0, "completions/mean_length": 270.0703125, "completions/mean_terminated_length": 271.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.12906666666666666, "grad_norm": 0.0026788460090756416, "learning_rate": 2.1944444444444445e-06, "loss": 0.0897, "num_tokens": 26061927.0, "reward": 1.000035285949707, "reward_std": 0.00011826898844446987, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8477269411087036, "rewards/format_reward_step": 1.0, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8673976100981236, "aux_distill/mean_u": 0.3998442732842434, "aux_distill/n_active_tok": 191.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5096590909090909, "calib/avg_num_step_conf": 6.01171875, "calib/ece": 0.139921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001515151515151514, "calib/mean_conf": 0.000703125, "calib/mu_c": 0.0008333333333333333, "calib/mu_w": 0.0006818181818181819, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0027052015145594974, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29958333333333337, "calib/step_q_c_n": 192.0, "calib/step_q_gap": 0.029442279138827077, "calib/step_q_w": 0.2701410541945063, "calib/step_q_w_n": 1347.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 707.0, "completions/max_terminated_length": 707.0, "completions/mean_length": 250.0, "completions/mean_terminated_length": 250.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.13013333333333332, "grad_norm": 0.004579862579703331, "learning_rate": 2.166666666666667e-06, "loss": 0.0859, "num_tokens": 26257079.0, "reward": 1.0001132488250732, "reward_std": 0.00033672014251351357, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8596014976501465, "rewards/format_reward_step": 1.0, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8707848694175482, "aux_distill/mean_u": 0.35850991461610887, "aux_distill/n_active_tok": 193.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5176339285714285, "calib/avg_num_step_conf": 6.0859375, "calib/ece": 0.2180859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005214285714285714, "calib/mean_conf": 0.0006640624999999999, "calib/mu_c": 0.0010714285714285715, "calib/mu_w": 0.00055, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002642138527044665, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28770392749244716, "calib/step_q_c_n": 331.0, "calib/step_q_gap": -0.0071442387667215446, "calib/step_q_w": 0.2948481662591687, "calib/step_q_w_n": 1227.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 834.0, "completions/max_terminated_length": 834.0, "completions/mean_length": 258.71484375, "completions/mean_terminated_length": 259.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.1312, "grad_norm": 0.00448179617524147, "learning_rate": 2.138888888888889e-06, "loss": 0.0866, "num_tokens": 26452406.0, "reward": 1.0002305507659912, "reward_std": 0.0006628651753999293, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7817113399505615, "rewards/format_reward_step": 1.0, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8901912048459053, "aux_distill/mean_u": 0.2978366075901536, "aux_distill/n_active_tok": 171.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4705912438625205, "calib/avg_num_step_conf": 5.37890625, "calib/ece": 0.18329411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007283142389525369, "calib/mean_conf": 0.001019607843137255, "calib/mu_c": 0.000425531914893617, "calib/mu_w": 0.001153846153846154, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003506242030942292, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28164502164502164, "calib/step_q_c_n": 231.0, "calib/step_q_gap": -0.03583316334625236, "calib/step_q_w": 0.317478184991274, "calib/step_q_w_n": 1146.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 548.0, "completions/max_terminated_length": 548.0, "completions/mean_length": 234.97265625, "completions/mean_terminated_length": 235.89413452148438, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.13226666666666667, "grad_norm": 0.0037744231522083282, "learning_rate": 2.1111111111111114e-06, "loss": 0.0938, "num_tokens": 26643183.0, "reward": 0.9981184005737305, "reward_std": 0.005757684353739023, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8126429915428162, "rewards/format_reward_step": 0.99609375, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8706583678722382, "aux_distill/mean_u": 0.3025617848825858, "aux_distill/n_active_tok": 168.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4990793780687397, "calib/avg_num_step_conf": 5.32421875, "calib/ece": 0.18341176470588236, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -6.239770867430441e-05, "calib/mean_conf": 0.0009019607843137254, "calib/mu_c": 0.000851063829787234, "calib/mu_w": 0.0009134615384615384, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002998397624096514, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33955284552845527, "calib/step_q_c_n": 246.0, "calib/step_q_gap": 0.035713991455044336, "calib/step_q_w": 0.30383885407341094, "calib/step_q_w_n": 1117.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 762.0, "completions/max_terminated_length": 762.0, "completions/mean_length": 235.75, "completions/mean_terminated_length": 236.67453002929688, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.13333333333333333, "grad_norm": 0.005619540344923735, "learning_rate": 2.0833333333333334e-06, "loss": 0.1079, "num_tokens": 26832151.0, "reward": 0.9845265746116638, "reward_std": 0.04456701502203941, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8010843396186829, "rewards/format_reward_step": 0.984375, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8616320062428713, "aux_distill/mean_u": 0.3353064855944266, "aux_distill/n_active_tok": 185.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49109924343569206, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.1634375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00017801513128615936, "calib/mean_conf": 0.0006250000000000001, "calib/mu_c": 0.0004761904761904762, "calib/mu_w": 0.0006542056074766356, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0024206145913796356, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3123214285714285, "calib/step_q_c_n": 224.0, "calib/step_q_gap": -0.019685663627153016, "calib/step_q_w": 0.33200709219858154, "calib/step_q_w_n": 1269.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 610.0, "completions/max_terminated_length": 610.0, "completions/mean_length": 248.95703125, "completions/mean_terminated_length": 249.933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.1344, "grad_norm": 0.0028145809192210436, "learning_rate": 2.0555555555555555e-06, "loss": 0.0784, "num_tokens": 27025156.0, "reward": 1.0000749826431274, "reward_std": 0.00022589563741348684, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8360875248908997, "rewards/format_reward_step": 1.0, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8615944497287273, "aux_distill/mean_u": 0.3128397979115316, "aux_distill/n_active_tok": 171.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4927832830676432, "calib/avg_num_step_conf": 5.359375, "calib/ece": 0.17175686274509805, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00035308056872037914, "calib/mean_conf": 0.000792156862745098, "calib/mu_c": 0.0005, "calib/mu_w": 0.0008530805687203792, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0028311878707898927, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30305699481865284, "calib/step_q_c_n": 193.0, "calib/step_q_gap": -0.027531385164383637, "calib/step_q_w": 0.3305883799830365, "calib/step_q_w_n": 1179.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2177.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 234.6328125, "completions/mean_terminated_length": 234.6328125, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.13546666666666668, "grad_norm": 0.004149171058088541, "learning_rate": 2.027777777777778e-06, "loss": 0.1157, "num_tokens": 27212702.0, "reward": 0.9922691583633423, "reward_std": 0.022253649309277534, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8204757571220398, "rewards/format_reward_step": 0.9921875, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.896057328209281, "aux_distill/mean_u": 0.3779112387709184, "aux_distill/n_active_tok": 167.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4755381604696673, "calib/avg_num_step_conf": 5.25390625, "calib/ece": 0.13710905511811025, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00046451402478799736, "calib/mean_conf": 0.000686220472440945, "calib/mu_c": 0.00028571428571428574, "calib/mu_w": 0.0007502283105022831, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002508946018674027, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2608588957055215, "calib/step_q_c_n": 163.0, "calib/step_q_gap": -0.025587804802092717, "calib/step_q_w": 0.2864467005076142, "calib/step_q_w_n": 1182.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 664.0, "completions/max_terminated_length": 664.0, "completions/mean_length": 235.07421875, "completions/mean_terminated_length": 235.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.13653333333333334, "grad_norm": 0.0037468273658305407, "learning_rate": 2.0000000000000003e-06, "loss": 0.0824, "num_tokens": 27403353.0, "reward": 0.9922232627868652, "reward_std": 0.022215476259589195, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8555401563644409, "rewards/format_reward_step": 0.9921875, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.896693766117096, "aux_distill/mean_u": 0.3474992860126997, "aux_distill/n_active_tok": 174.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5337232264924806, "calib/avg_num_step_conf": 5.6015625, "calib/ece": 0.112483203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006555217985720797, "calib/mean_conf": 0.0007980468749999999, "calib/mu_c": 0.0013793103448275863, "calib/mu_w": 0.0007237885462555066, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0028335662596457377, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33808641975308645, "calib/step_q_c_n": 162.0, "calib/step_q_gap": 0.025081702771954373, "calib/step_q_w": 0.3130047169811321, "calib/step_q_w_n": 1272.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 875.0, "completions/max_terminated_length": 875.0, "completions/mean_length": 232.25390625, "completions/mean_terminated_length": 233.1647186279297, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1376, "grad_norm": 0.003721039043739438, "learning_rate": 1.9722222222222224e-06, "loss": 0.0916, "num_tokens": 27589002.0, "reward": 1.0001518726348877, "reward_std": 0.00037261395482346416, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.8870225548744202, "rewards/format_reward_step": 1.0, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.866528145968914, "aux_distill/mean_u": 0.3317948903451359, "aux_distill/n_active_tok": 172.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5101449275362319, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.17891796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00012194616977225675, "calib/mean_conf": 0.00076953125, "calib/mu_c": 0.0008695652173913044, "calib/mu_w": 0.0007476190476190476, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0030602047162360617, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3190350877192983, "calib/step_q_c_n": 228.0, "calib/step_q_gap": -0.0020002876042564677, "calib/step_q_w": 0.32103537532355475, "calib/step_q_w_n": 1159.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 554.0, "completions/max_terminated_length": 554.0, "completions/mean_length": 229.515625, "completions/mean_terminated_length": 230.4156951904297, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.13866666666666666, "grad_norm": 0.0038857655599713326, "learning_rate": 1.944444444444445e-06, "loss": 0.0782, "num_tokens": 27776854.0, "reward": 1.00015127658844, "reward_std": 0.00044980537495575845, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8206150531768799, "rewards/format_reward_step": 1.0, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8693040069192648, "aux_distill/mean_u": 0.31701613274923535, "aux_distill/n_active_tok": 191.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5135494987468672, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.10850390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006259398496240602, "calib/mean_conf": 0.00087109375, "calib/mu_c": 0.0014285714285714286, "calib/mu_w": 0.0008026315789473684, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.002941318059766903, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.32202797202797206, "calib/step_q_c_n": 143.0, "calib/step_q_gap": -0.006143737866360299, "calib/step_q_w": 0.32817170989433236, "calib/step_q_w_n": 1388.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 578.0, "completions/max_terminated_length": 578.0, "completions/mean_length": 233.84765625, "completions/mean_terminated_length": 234.7647247314453, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.13973333333333332, "grad_norm": 0.004119996912777424, "learning_rate": 1.916666666666667e-06, "loss": 0.0873, "num_tokens": 27966735.0, "reward": 0.9962453246116638, "reward_std": 0.011495009064674377, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.8870218396186829, "rewards/format_reward_step": 0.99609375, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8582376260310411, "aux_distill/mean_u": 0.368528070301397, "aux_distill/n_active_tok": 165.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49205236091631605, "calib/avg_num_step_conf": 5.1875, "calib/ece": 0.2699607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00026414212248714356, "calib/mean_conf": 0.0006274509803921569, "calib/mu_c": 0.00043478260869565214, "calib/mu_w": 0.0006989247311827957, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0028694522556747793, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29436666666666667, "calib/step_q_c_n": 300.0, "calib/step_q_gap": -0.045698508430609575, "calib/step_q_w": 0.34006517509727624, "calib/step_q_w_n": 1028.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 546.0, "completions/max_terminated_length": 546.0, "completions/mean_length": 222.0234375, "completions/mean_terminated_length": 222.89413452148438, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.1408, "grad_norm": 0.004884951747953892, "learning_rate": 1.888888888888889e-06, "loss": 0.0881, "num_tokens": 28152973.0, "reward": 0.9962066411972046, "reward_std": 0.011387603357434273, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7267882823944092, "rewards/format_reward_step": 0.99609375, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8835033662617207, "aux_distill/mean_u": 0.35925395969114193, "aux_distill/n_active_tok": 184.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5800740740740742, "calib/avg_num_step_conf": 5.84765625, "calib/ece": 0.11666666666666667, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015333333333333336, "calib/mean_conf": 0.000980392156862745, "calib/mu_c": 0.0023333333333333335, "calib/mu_w": 0.0007999999999999999, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.003226667047956739, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3530057803468208, "calib/step_q_c_n": 173.0, "calib/step_q_gap": 0.008497321132319324, "calib/step_q_w": 0.34450845921450146, "calib/step_q_w_n": 1324.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 559.0, "completions/max_terminated_length": 559.0, "completions/mean_length": 238.609375, "completions/mean_terminated_length": 239.54510498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.14186666666666667, "grad_norm": 0.00523751974105835, "learning_rate": 1.8611111111111113e-06, "loss": 0.0948, "num_tokens": 28344209.0, "reward": 0.992455244064331, "reward_std": 0.022799167782068253, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8755354881286621, "rewards/format_reward_step": 0.9921875, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8557330705225468, "aux_distill/mean_u": 0.3506175068699858, "aux_distill/n_active_tok": 211.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49714787187362874, "calib/avg_num_step_conf": 6.6015625, "calib/ece": 0.16788235294117648, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -5.704256252742426e-05, "calib/mean_conf": 0.0007450980392156863, "calib/mu_c": 0.0006976744186046512, "calib/mu_w": 0.0007547169811320754, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0026259873008287384, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32928464419475656, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.025691109409092472, "calib/step_q_w": 0.3035935347856641, "calib/step_q_w_n": 1423.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2884.0, "completions/max_terminated_length": 2884.0, "completions/mean_length": 266.01171875, "completions/mean_terminated_length": 266.01171875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.14293333333333333, "grad_norm": 0.003346212673932314, "learning_rate": 1.8333333333333333e-06, "loss": 0.1081, "num_tokens": 28545068.0, "reward": 0.9962071776390076, "reward_std": 0.011385188437998295, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8283519744873047, "rewards/format_reward_step": 0.99609375, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9089143630117178, "aux_distill/mean_u": 0.32230048849550913, "aux_distill/n_active_tok": 173.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5025172231054584, "calib/avg_num_step_conf": 5.43359375, "calib/ece": 0.13230078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.823529411764721e-05, "calib/mean_conf": 0.00051171875, "calib/mu_c": 0.0005882352941176471, "calib/mu_w": 0.0004999999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0021954897337264955, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29726775956284146, "calib/step_q_c_n": 183.0, "calib/step_q_gap": -0.004379591430536012, "calib/step_q_w": 0.30164735099337747, "calib/step_q_w_n": 1208.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 719.0, "completions/max_terminated_length": 719.0, "completions/mean_length": 240.73828125, "completions/mean_terminated_length": 241.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.144, "grad_norm": 0.004149943124502897, "learning_rate": 1.8055555555555557e-06, "loss": 0.0905, "num_tokens": 28736385.0, "reward": 0.9961693286895752, "reward_std": 0.011272847652435303, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8634324073791504, "rewards/format_reward_step": 0.99609375, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8809623774141073, "aux_distill/mean_u": 0.3241061942736305, "aux_distill/n_active_tok": 183.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5138349514563106, "calib/avg_num_step_conf": 5.75, "calib/ece": 0.1945546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003009708737864077, "calib/mean_conf": 0.0007578125, "calib/mu_c": 0.001, "calib/mu_w": 0.0006990291262135923, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002628705996273404, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30015267175572524, "calib/step_q_c_n": 262.0, "calib/step_q_gap": -0.03536790675667145, "calib/step_q_w": 0.3355205785123967, "calib/step_q_w_n": 1210.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 235.9609375, "completions/mean_terminated_length": 236.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.14506666666666668, "grad_norm": 0.003267818596214056, "learning_rate": 1.777777777777778e-06, "loss": 0.0877, "num_tokens": 28929087.0, "reward": 1.000191569328308, "reward_std": 0.00047984960838221014, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8050706386566162, "rewards/format_reward_step": 1.0, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8728037420660257, "aux_distill/mean_u": 0.37050528594314014, "aux_distill/n_active_tok": 176.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5345982142857143, "calib/avg_num_step_conf": 5.55078125, "calib/ece": 0.12390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012500000000000002, "calib/mean_conf": 0.00109375, "calib/mu_c": 0.0021875, "calib/mu_w": 0.0009375000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0035869500885153116, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3929787234042554, "calib/step_q_c_n": 141.0, "calib/step_q_gap": 0.051128723404255394, "calib/step_q_w": 0.34185, "calib/step_q_w_n": 1280.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 791.0, "completions/max_terminated_length": 791.0, "completions/mean_length": 237.84765625, "completions/mean_terminated_length": 238.78041076660156, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.14613333333333334, "grad_norm": 0.004397119395434856, "learning_rate": 1.75e-06, "loss": 0.0846, "num_tokens": 29120768.0, "reward": 0.996360182762146, "reward_std": 0.011717451736330986, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8716264963150024, "rewards/format_reward_step": 0.99609375, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8349598366767168, "aux_distill/mean_u": 0.2957780477687902, "aux_distill/n_active_tok": 177.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4908908574293832, "calib/avg_num_step_conf": 5.53515625, "calib/ece": 0.24215686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00022981781714858762, "calib/mean_conf": 0.000980392156862745, "calib/mu_c": 0.0008064516129032258, "calib/mu_w": 0.0010362694300518134, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003102751442329801, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3777852348993288, "calib/step_q_c_n": 298.0, "calib/step_q_gap": 0.015041445801920406, "calib/step_q_w": 0.3627437890974084, "calib/step_q_w_n": 1119.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 519.0, "completions/max_terminated_length": 519.0, "completions/mean_length": 222.1328125, "completions/mean_terminated_length": 223.00393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.1472, "grad_norm": 0.004196003545075655, "learning_rate": 1.7222222222222224e-06, "loss": 0.0751, "num_tokens": 29305778.0, "reward": 0.996283769607544, "reward_std": 0.011531622149050236, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7542863488197327, "rewards/format_reward_step": 0.99609375, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8856425620615482, "aux_distill/mean_u": 0.3184689657209526, "aux_distill/n_active_tok": 159.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5523317435082141, "calib/avg_num_step_conf": 5.015625, "calib/ece": 0.1313671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010466348701642816, "calib/mean_conf": 0.0014453125, "calib/mu_c": 0.002352941176470588, "calib/mu_w": 0.0013063063063063064, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0035162759813961914, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.4230817610062893, "calib/step_q_c_n": 159.0, "calib/step_q_gap": 0.05747598322851155, "calib/step_q_w": 0.36560577777777775, "calib/step_q_w_n": 1125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 549.0, "completions/max_terminated_length": 549.0, "completions/mean_length": 211.58203125, "completions/mean_terminated_length": 212.41177368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.14826666666666666, "grad_norm": 0.004312260076403618, "learning_rate": 1.6944444444444446e-06, "loss": 0.0947, "num_tokens": 29486847.0, "reward": 1.0003052949905396, "reward_std": 0.0006617589388042688, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8677979707717896, "rewards/format_reward_step": 1.0, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8976630344986916, "aux_distill/mean_u": 0.3117378416124111, "aux_distill/n_active_tok": 159.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4712398585396297, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.17905098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003101726648637403, "calib/mean_conf": 0.0013411764705882354, "calib/mu_c": 0.0010869565217391304, "calib/mu_w": 0.0013971291866028707, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.003510945153490569, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2988267326732673, "calib/step_q_c_n": 202.0, "calib/step_q_gap": -0.02715474880821417, "calib/step_q_w": 0.3259814814814815, "calib/step_q_w_n": 1080.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 210.6484375, "completions/mean_terminated_length": 211.47451782226562, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.14933333333333335, "grad_norm": 0.005486528854817152, "learning_rate": 1.6666666666666667e-06, "loss": 0.0883, "num_tokens": 29669597.0, "reward": 0.9923757910728455, "reward_std": 0.022655244916677475, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8128765821456909, "rewards/format_reward_step": 0.9921875, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.851903609931469, "aux_distill/mean_u": 0.3203507632368131, "aux_distill/n_active_tok": 169.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5276381909547738, "calib/avg_num_step_conf": 5.35546875, "calib/ece": 0.2209765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007731640659437539, "calib/mean_conf": 0.0016796875, "calib/mu_c": 0.002280701754385965, "calib/mu_w": 0.0015075376884422112, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004135278092504027, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3629182879377432, "calib/step_q_c_n": 257.0, "calib/step_q_gap": 0.04536891630399098, "calib/step_q_w": 0.31754937163375224, "calib/step_q_w_n": 1114.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 565.0, "completions/max_terminated_length": 565.0, "completions/mean_length": 225.58203125, "completions/mean_terminated_length": 226.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.1504, "grad_norm": 0.0052869487553834915, "learning_rate": 1.638888888888889e-06, "loss": 0.1107, "num_tokens": 29858250.0, "reward": 1.000497817993164, "reward_std": 0.0012639259221032262, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7783395051956177, "rewards/format_reward_step": 1.0, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8793974351137877, "aux_distill/mean_u": 0.32945197865296405, "aux_distill/n_active_tok": 163.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5704353772509688, "calib/avg_num_step_conf": 5.171875, "calib/ece": 0.15894117647058825, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015819466605881012, "calib/mean_conf": 0.001843137254901961, "calib/mu_c": 0.0031707317073170734, "calib/mu_w": 0.0015887850467289721, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004074658888740561, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3137755102040816, "calib/step_q_c_n": 196.0, "calib/step_q_gap": -0.01651704298740775, "calib/step_q_w": 0.33029255319148937, "calib/step_q_w_n": 1128.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 682.0, "completions/max_terminated_length": 682.0, "completions/mean_length": 211.09765625, "completions/mean_terminated_length": 211.92550659179688, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.15146666666666667, "grad_norm": 0.0058931345120072365, "learning_rate": 1.6111111111111113e-06, "loss": 0.0805, "num_tokens": 30041259.0, "reward": 0.9965915679931641, "reward_std": 0.012145813554525375, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8369331955909729, "rewards/format_reward_step": 0.99609375, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8862555511295795, "aux_distill/mean_u": 0.3019375839277697, "aux_distill/n_active_tok": 156.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5765437392795882, "calib/avg_num_step_conf": 4.96484375, "calib/ece": 0.1699453125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001512006861063465, "calib/mean_conf": 0.0019296875, "calib/mu_c": 0.003181818181818182, "calib/mu_w": 0.0016698113207547168, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004032453490412971, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31093750000000003, "calib/step_q_c_n": 192.0, "calib/step_q_gap": 0.005059835495829501, "calib/step_q_w": 0.30587766450417053, "calib/step_q_w_n": 1079.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 719.0, "completions/max_terminated_length": 719.0, "completions/mean_length": 215.78515625, "completions/mean_terminated_length": 216.63137817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.15253333333333333, "grad_norm": 0.00680437870323658, "learning_rate": 1.5833333333333333e-06, "loss": 0.0785, "num_tokens": 30227644.0, "reward": 0.9966306686401367, "reward_std": 0.01232366356998682, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8252924680709839, "rewards/format_reward_step": 0.99609375, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8676097318530083, "aux_distill/mean_u": 0.2938534331536071, "aux_distill/n_active_tok": 159.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4773992793973141, "calib/avg_num_step_conf": 5.015625, "calib/ece": 0.1660546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00034392400917130717, "calib/mean_conf": 0.0019140625, "calib/mu_c": 0.0016279069767441861, "calib/mu_w": 0.0019718309859154933, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004221461802041296, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32804123711340205, "calib/step_q_c_n": 194.0, "calib/step_q_gap": 0.02995866830606264, "calib/step_q_w": 0.2980825688073394, "calib/step_q_w_n": 1090.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 206.1015625, "completions/mean_terminated_length": 206.90982055664062, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.1536, "grad_norm": 0.004363170359283686, "learning_rate": 1.5555555555555558e-06, "loss": 0.0915, "num_tokens": 30408342.0, "reward": 1.00026273727417, "reward_std": 0.0006111764814704657, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8325566053390503, "rewards/format_reward_step": 1.0, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.871504008769989, "aux_distill/mean_u": 0.30711970518687853, "aux_distill/n_active_tok": 161.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5699991183990125, "calib/avg_num_step_conf": 5.12109375, "calib/ece": 0.21972265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014169972670369391, "calib/mean_conf": 0.0029335937500000004, "calib/mu_c": 0.004035087719298246, "calib/mu_w": 0.0026180904522613065, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005038085842853508, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3683471074380165, "calib/step_q_c_n": 242.0, "calib/step_q_gap": 0.05525169116112222, "calib/step_q_w": 0.3130954162768943, "calib/step_q_w_n": 1069.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 208.78515625, "completions/mean_terminated_length": 209.6039276123047, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.15466666666666667, "grad_norm": 0.006876181345432997, "learning_rate": 1.527777777777778e-06, "loss": 0.0934, "num_tokens": 30588303.0, "reward": 1.0008814334869385, "reward_std": 0.0017051099566742778, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.779106616973877, "rewards/format_reward_step": 1.0, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9115830920636654, "aux_distill/mean_u": 0.3254450969587543, "aux_distill/n_active_tok": 171.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5464257964257965, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.08310546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008799533799533797, "calib/mean_conf": 0.00283203125, "calib/mu_c": 0.0036363636363636364, "calib/mu_w": 0.0027564102564102567, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004744971575154422, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31356435643564357, "calib/step_q_c_n": 101.0, "calib/step_q_gap": -0.028725228529117575, "calib/step_q_w": 0.34228958496476114, "calib/step_q_w_n": 1277.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 685.0, "completions/max_terminated_length": 685.0, "completions/mean_length": 219.73828125, "completions/mean_terminated_length": 220.60000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.15573333333333333, "grad_norm": 0.0055837687104940414, "learning_rate": 1.5e-06, "loss": 0.0813, "num_tokens": 30775580.0, "reward": 1.0002973079681396, "reward_std": 0.0008225127821788192, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.914656937122345, "rewards/format_reward_step": 1.0, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8444254854694009, "aux_distill/mean_u": 0.2973344318918232, "aux_distill/n_active_tok": 157.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5471390632544664, "calib/avg_num_step_conf": 4.9453125, "calib/ece": 0.14556640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012638821825205226, "calib/mean_conf": 0.0028710937500000004, "calib/mu_c": 0.003947368421052633, "calib/mu_w": 0.00268348623853211, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00488730262299266, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.34029761904761907, "calib/step_q_c_n": 168.0, "calib/step_q_gap": 0.017043520686963354, "calib/step_q_w": 0.3232540983606557, "calib/step_q_w_n": 1098.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 441.0, "completions/max_terminated_length": 441.0, "completions/mean_length": 202.6171875, "completions/mean_terminated_length": 203.41177368164062, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.1568, "grad_norm": 0.006587905343621969, "learning_rate": 1.4722222222222225e-06, "loss": 0.0974, "num_tokens": 30954938.0, "reward": 1.0005698204040527, "reward_std": 0.0012822604039683938, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8527022004127502, "rewards/format_reward_step": 1.0, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.865119538269937, "aux_distill/mean_u": 0.32240494753424903, "aux_distill/n_active_tok": 151.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5549730224982184, "calib/avg_num_step_conf": 4.80078125, "calib/ece": 0.18, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013315687671790694, "calib/mean_conf": 0.00359375, "calib/mu_c": 0.004680851063829788, "calib/mu_w": 0.003349282296650718, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005113458803735491, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3392574257425743, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.042695595168085465, "calib/step_q_w": 0.2965618305744888, "calib/step_q_w_n": 1027.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 502.0, "completions/max_terminated_length": 502.0, "completions/mean_length": 190.22265625, "completions/mean_terminated_length": 190.96864318847656, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.15786666666666666, "grad_norm": 0.006722502876073122, "learning_rate": 1.4444444444444445e-06, "loss": 0.0788, "num_tokens": 31132555.0, "reward": 1.0008398294448853, "reward_std": 0.001604650286026299, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8180859684944153, "rewards/format_reward_step": 1.0, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8780522029846907, "aux_distill/mean_u": 0.2941598656144137, "aux_distill/n_active_tok": 151.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5526471458148476, "calib/avg_num_step_conf": 4.78515625, "calib/ece": 0.1875390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007699891550823237, "calib/mean_conf": 0.0038671875, "calib/mu_c": 0.004489795918367348, "calib/mu_w": 0.003719806763285024, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005752541685189579, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.32889830508474577, "calib/step_q_c_n": 236.0, "calib/step_q_gap": 0.0374008328906103, "calib/step_q_w": 0.29149747219413547, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 625.0, "completions/max_terminated_length": 625.0, "completions/mean_length": 202.828125, "completions/mean_terminated_length": 203.62353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.15893333333333334, "grad_norm": 0.0074161868542432785, "learning_rate": 1.4166666666666667e-06, "loss": 0.0712, "num_tokens": 31312743.0, "reward": 0.9969292879104614, "reward_std": 0.012576460838317871, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8063585758209229, "rewards/format_reward_step": 0.99609375, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9232179820537567, "aux_distill/mean_u": 0.33005514047016793, "aux_distill/n_active_tok": 138.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5525841346153846, "calib/avg_num_step_conf": 4.453125, "calib/ece": 0.183, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011282051282051281, "calib/mean_conf": 0.0045000000000000005, "calib/mu_c": 0.005416666666666667, "calib/mu_w": 0.004288461538461539, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005700877125495689, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.27097142857142853, "calib/step_q_c_n": 175.0, "calib/step_q_gap": -0.00786691339748341, "calib/step_q_w": 0.27883834196891194, "calib/step_q_w_n": 965.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 475.0, "completions/max_terminated_length": 475.0, "completions/mean_length": 178.2421875, "completions/mean_terminated_length": 178.94119262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.16, "grad_norm": 0.007917214184999466, "learning_rate": 1.3888888888888892e-06, "loss": 0.1157, "num_tokens": 31487141.0, "reward": 0.9970831871032715, "reward_std": 0.012835890986025333, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.810572624206543, "rewards/format_reward_step": 0.99609375, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9045738726854324, "aux_distill/mean_u": 0.34284381420736626, "aux_distill/n_active_tok": 151.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5800421274354923, "calib/avg_num_step_conf": 4.7421875, "calib/ece": 0.17123046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002027909426013692, "calib/mean_conf": 0.00455078125, "calib/mu_c": 0.006222222222222223, "calib/mu_w": 0.004194312796208531, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0056330982828855765, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24280701754385964, "calib/step_q_c_n": 228.0, "calib/step_q_gap": -0.052558094018006485, "calib/step_q_w": 0.2953651115618661, "calib/step_q_w_n": 986.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 484.0, "completions/max_terminated_length": 484.0, "completions/mean_length": 197.1328125, "completions/mean_terminated_length": 197.90589904785156, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.16106666666666666, "grad_norm": 0.0069051096215844154, "learning_rate": 1.3611111111111112e-06, "loss": 0.0868, "num_tokens": 31668439.0, "reward": 1.0010675191879272, "reward_std": 0.0018596667796373367, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8263537883758545, "rewards/format_reward_step": 1.0, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8891061600297689, "aux_distill/mean_u": 0.2850408530933203, "aux_distill/n_active_tok": 145.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5039297658862876, "calib/avg_num_step_conf": 4.55078125, "calib/ece": 0.0962109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.678929765886345e-05, "calib/mean_conf": 0.0053515625, "calib/mu_c": 0.005384615384615385, "calib/mu_w": 0.005347826086956522, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005852469889593089, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2187155963302752, "calib/step_q_c_n": 109.0, "calib/step_q_gap": -0.051278721851542974, "calib/step_q_w": 0.2699943181818182, "calib/step_q_w_n": 1056.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 196.66796875, "completions/mean_terminated_length": 197.43922424316406, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.16213333333333332, "grad_norm": 0.0065941475331783295, "learning_rate": 1.3333333333333334e-06, "loss": 0.0724, "num_tokens": 31847986.0, "reward": 1.0005154609680176, "reward_std": 0.0011244446504861116, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8994683623313904, "rewards/format_reward_step": 1.0, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9007142148911953, "aux_distill/mean_u": 0.36622527358126683, "aux_distill/n_active_tok": 142.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.41713121783876494, "calib/avg_num_step_conf": 4.5546875, "calib/ece": 0.1669921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017795883361921105, "calib/mean_conf": 0.0048828125, "calib/mu_c": 0.003409090909090909, "calib/mu_w": 0.0051886792452830195, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005658402344287983, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26424731182795697, "calib/step_q_c_n": 186.0, "calib/step_q_gap": -0.027946565723063477, "calib/step_q_w": 0.29219387755102044, "calib/step_q_w_n": 980.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 516.0, "completions/max_terminated_length": 516.0, "completions/mean_length": 193.578125, "completions/mean_terminated_length": 194.33726501464844, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.1632, "grad_norm": 0.007551599759608507, "learning_rate": 1.3055555555555556e-06, "loss": 0.0752, "num_tokens": 32028670.0, "reward": 1.0005578994750977, "reward_std": 0.0011114366352558136, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8292410373687744, "rewards/format_reward_step": 1.0, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8881961200386286, "aux_distill/mean_u": 0.3052273858621972, "aux_distill/n_active_tok": 140.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5098214285714286, "calib/avg_num_step_conf": 4.390625, "calib/ece": 0.21355078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00020214285714285733, "calib/mean_conf": 0.00519921875, "calib/mu_c": 0.005357142857142858, "calib/mu_w": 0.005155000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005728505969242629, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32344000000000006, "calib/step_q_c_n": 250.0, "calib/step_q_gap": 0.01546334096109847, "calib/step_q_w": 0.3079766590389016, "calib/step_q_w_n": 874.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 490.0, "completions/max_terminated_length": 490.0, "completions/mean_length": 187.39453125, "completions/mean_terminated_length": 188.12942504882812, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.16426666666666667, "grad_norm": 0.008079719729721546, "learning_rate": 1.2777777777777779e-06, "loss": 0.089, "num_tokens": 32204891.0, "reward": 1.0011417865753174, "reward_std": 0.0018572057597339153, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7835339307785034, "rewards/format_reward_step": 1.0, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8976440113037825, "aux_distill/mean_u": 0.2993254863342828, "aux_distill/n_active_tok": 132.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5273582405935346, "calib/avg_num_step_conf": 4.16015625, "calib/ece": 0.1275390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003630100688924217, "calib/mean_conf": 0.0052734374999999995, "calib/mu_c": 0.005588235294117647, "calib/mu_w": 0.005225225225225226, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006244015689729948, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2894736842105263, "calib/step_q_c_n": 152.0, "calib/step_q_gap": 0.011381679829365332, "calib/step_q_w": 0.278092004381161, "calib/step_q_w_n": 913.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 438.0, "completions/max_terminated_length": 438.0, "completions/mean_length": 176.953125, "completions/mean_terminated_length": 177.64706420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.16533333333333333, "grad_norm": 0.008248928934335709, "learning_rate": 1.25e-06, "loss": 0.0824, "num_tokens": 32381215.0, "reward": 1.000708818435669, "reward_std": 0.0015589774120599031, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8686050176620483, "rewards/format_reward_step": 1.0, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.890786474570632, "aux_distill/mean_u": 0.2993873599101748, "aux_distill/n_active_tok": 143.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.42251082251082245, "calib/avg_num_step_conf": 4.48046875, "calib/ece": 0.0920703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017575757575757581, "calib/mean_conf": 0.0055859375, "calib/mu_c": 0.004, "calib/mu_w": 0.005757575757575758, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005559377415331123, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29452991452991456, "calib/step_q_c_n": 117.0, "calib/step_q_gap": 0.0008988465687494451, "calib/step_q_w": 0.2936310679611651, "calib/step_q_w_n": 1030.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 185.2421875, "completions/mean_terminated_length": 185.96864318847656, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1664, "grad_norm": 0.0061567374505102634, "learning_rate": 1.2222222222222223e-06, "loss": 0.1028, "num_tokens": 32557205.0, "reward": 1.0003595352172852, "reward_std": 0.0009770547039806843, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.9030628800392151, "rewards/format_reward_step": 1.0, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8980565015226603, "aux_distill/mean_u": 0.3480745046670132, "aux_distill/n_active_tok": 137.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5370686848958333, "calib/avg_num_step_conf": 4.328125, "calib/ece": 0.24380859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009114583333333344, "calib/mean_conf": 0.00619140625, "calib/mu_c": 0.006875000000000001, "calib/mu_w": 0.0059635416666666665, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005728090423296488, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.260632911392405, "calib/step_q_c_n": 237.0, "calib/step_q_gap": -0.022214390559374575, "calib/step_q_w": 0.2828473019517796, "calib/step_q_w_n": 871.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 570.0, "completions/max_terminated_length": 570.0, "completions/mean_length": 177.19140625, "completions/mean_terminated_length": 177.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.16746666666666668, "grad_norm": 0.007363366428762674, "learning_rate": 1.1944444444444446e-06, "loss": 0.0966, "num_tokens": 32730102.0, "reward": 0.9977768659591675, "reward_std": 0.013398206792771816, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7494601011276245, "rewards/format_reward_step": 0.99609375, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9132845029234886, "aux_distill/mean_u": 0.2984792581929277, "aux_distill/n_active_tok": 133.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4822831489498156, "calib/avg_num_step_conf": 4.21484375, "calib/ece": 0.2521568627450981, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004088504088504089, "calib/mean_conf": 0.006666666666666667, "calib/mu_c": 0.006363636363636364, "calib/mu_w": 0.006772486772486773, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00596284793999944, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3271505791505792, "calib/step_q_c_n": 259.0, "calib/step_q_gap": -0.0033006403616159252, "calib/step_q_w": 0.33045121951219514, "calib/step_q_w_n": 820.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 483.0, "completions/max_terminated_length": 483.0, "completions/mean_length": 173.625, "completions/mean_terminated_length": 174.30589294433594, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.16853333333333334, "grad_norm": 0.00829799473285675, "learning_rate": 1.1666666666666668e-06, "loss": 0.0762, "num_tokens": 32903598.0, "reward": 0.9976944923400879, "reward_std": 0.013237364590168, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7414828538894653, "rewards/format_reward_step": 0.99609375, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8752852268517017, "aux_distill/mean_u": 0.3136494702197504, "aux_distill/n_active_tok": 139.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4699352911896466, "calib/avg_num_step_conf": 4.34765625, "calib/ece": 0.18618110236220473, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007555998008959685, "calib/mean_conf": 0.006732283464566929, "calib/mu_c": 0.006122448979591837, "calib/mu_w": 0.006878048780487806, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006207626524627117, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3257842105263158, "calib/step_q_c_n": 190.0, "calib/step_q_gap": 0.0023574499629355206, "calib/step_q_w": 0.32342676056338027, "calib/step_q_w_n": 923.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2594.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 184.484375, "completions/mean_terminated_length": 184.484375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.1696, "grad_norm": 0.008591427467763424, "learning_rate": 1.138888888888889e-06, "loss": 0.1353, "num_tokens": 33079418.0, "reward": 0.993317723274231, "reward_std": 0.024074668064713478, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8030418157577515, "rewards/format_reward_step": 0.9921875, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8801496736705303, "aux_distill/mean_u": 0.31119047924721255, "aux_distill/n_active_tok": 137.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4530465949820789, "calib/avg_num_step_conf": 4.30859375, "calib/ece": 0.113671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011039426523297485, "calib/mean_conf": 0.007421875, "calib/mu_c": 0.0064516129032258064, "calib/mu_w": 0.007555555555555555, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006026775380282146, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30671641791044774, "calib/step_q_c_n": 134.0, "calib/step_q_gap": -0.014521972182431564, "calib/step_q_w": 0.3212383900928793, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 557.0, "completions/max_terminated_length": 557.0, "completions/mean_length": 171.9765625, "completions/mean_terminated_length": 172.65098571777344, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.17066666666666666, "grad_norm": 0.007910117506980896, "learning_rate": 1.111111111111111e-06, "loss": 0.0578, "num_tokens": 33252092.0, "reward": 0.9968292713165283, "reward_std": 0.012476676143705845, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8764711022377014, "rewards/format_reward_step": 0.99609375, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8750440068542957, "aux_distill/mean_u": 0.29593528663277124, "aux_distill/n_active_tok": 130.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.53564453125, "calib/avg_num_step_conf": 4.17578125, "calib/ece": 0.2423984375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000697916666666667, "calib/mean_conf": 0.0076015625, "calib/mu_c": 0.008125, "calib/mu_w": 0.007427083333333333, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008348689870787735, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29059090909090907, "calib/step_q_c_n": 220.0, "calib/step_q_gap": -0.03642558089731235, "calib/step_q_w": 0.3270164899882214, "calib/step_q_w_n": 849.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 451.0, "completions/max_terminated_length": 451.0, "completions/mean_length": 165.85546875, "completions/mean_terminated_length": 166.50588989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.17173333333333332, "grad_norm": 0.009085629135370255, "learning_rate": 1.0833333333333335e-06, "loss": 0.1112, "num_tokens": 33422279.0, "reward": 1.001967430114746, "reward_std": 0.002947971224784851, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7539349794387817, "rewards/format_reward_step": 1.0, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8783328235149384, "aux_distill/mean_u": 0.2781622598684642, "aux_distill/n_active_tok": 132.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5306853815141108, "calib/avg_num_step_conf": 4.14453125, "calib/ece": 0.28325490196078434, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006921009407197254, "calib/mean_conf": 0.006941176470588235, "calib/mu_c": 0.007432432432432433, "calib/mu_w": 0.0067403314917127075, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005744261466227626, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.346728187919463, "calib/step_q_c_n": 298.0, "calib/step_q_gap": 0.05592871216585882, "calib/step_q_w": 0.2907994757536042, "calib/step_q_w_n": 763.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1928.0, "completions/max_terminated_length": 1928.0, "completions/mean_length": 179.453125, "completions/mean_terminated_length": 179.453125, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.1728, "grad_norm": 0.009630361571907997, "learning_rate": 1.0555555555555557e-06, "loss": 0.1369, "num_tokens": 33596171.0, "reward": 0.9982017278671265, "reward_std": 0.014269420877099037, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7112472653388977, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8860432486981153, "aux_distill/mean_u": 0.33796958895590445, "aux_distill/n_active_tok": 136.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49386574074074074, "calib/avg_num_step_conf": 4.36328125, "calib/ece": 0.1492578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002870370370370353, "calib/mean_conf": 0.0069921875, "calib/mu_c": 0.006750000000000001, "calib/mu_w": 0.007037037037037036, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006182126573020302, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2718055555555556, "calib/step_q_c_n": 144.0, "calib/step_q_gap": -0.03968468082676718, "calib/step_q_w": 0.31149023638232276, "calib/step_q_w_n": 973.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 170.16796875, "completions/mean_terminated_length": 170.83529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 58.0, "epoch": 0.17386666666666667, "grad_norm": 0.007554793730378151, "learning_rate": 1.0277777777777777e-06, "loss": 0.0762, "num_tokens": 33768374.0, "reward": 1.0010111331939697, "reward_std": 0.0017468599835410714, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8457722663879395, "rewards/format_reward_step": 1.0, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8970063086599112, "aux_distill/mean_u": 0.29121355008947214, "aux_distill/n_active_tok": 142.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5376078132927448, "calib/avg_num_step_conf": 4.46484375, "calib/ece": 0.13416862745098038, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005730593607305946, "calib/mean_conf": 0.0070078431372549025, "calib/mu_c": 0.007500000000000001, "calib/mu_w": 0.006926940639269406, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005982321771629778, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2468181818181818, "calib/step_q_c_n": 154.0, "calib/step_q_gap": -0.08941033183196986, "calib/step_q_w": 0.33622851365015166, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 620.0, "completions/max_terminated_length": 620.0, "completions/mean_length": 179.66796875, "completions/mean_terminated_length": 180.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.17493333333333333, "grad_norm": 0.009981459937989712, "learning_rate": 1.0000000000000002e-06, "loss": 0.0684, "num_tokens": 33944313.0, "reward": 0.9971061944961548, "reward_std": 0.012966196052730083, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8574935793876648, "rewards/format_reward_step": 0.99609375, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9019099716097116, "aux_distill/mean_u": 0.33034416956193996, "aux_distill/n_active_tok": 141.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5552083333333333, "calib/avg_num_step_conf": 4.4375, "calib/ece": 0.1494140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010833333333333337, "calib/mean_conf": 0.006835937499999999, "calib/mu_c": 0.00775, "calib/mu_w": 0.006666666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005497552500530919, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2841301775147929, "calib/step_q_c_n": 169.0, "calib/step_q_gap": -0.03139205619772001, "calib/step_q_w": 0.3155222337125129, "calib/step_q_w_n": 967.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 178.21484375, "completions/mean_terminated_length": 178.9137420654297, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.176, "grad_norm": 0.008244171738624573, "learning_rate": 9.722222222222224e-07, "loss": 0.0962, "num_tokens": 34119320.0, "reward": 1.0011725425720215, "reward_std": 0.0019493226427584887, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8460949063301086, "rewards/format_reward_step": 1.0, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9130820780992508, "aux_distill/mean_u": 0.34838786578038355, "aux_distill/n_active_tok": 139.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4449762989125383, "calib/avg_num_step_conf": 4.36328125, "calib/ece": 0.19944140625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001243517055488426, "calib/mean_conf": 0.00758984375, "calib/mu_c": 0.006603773584905662, "calib/mu_w": 0.007847290640394088, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005769644971797306, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2882242990654206, "calib/step_q_c_n": 214.0, "calib/step_q_gap": -0.02515333105639561, "calib/step_q_w": 0.3133776301218162, "calib/step_q_w_n": 903.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 431.0, "completions/max_terminated_length": 431.0, "completions/mean_length": 177.10546875, "completions/mean_terminated_length": 177.8000030517578, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.17706666666666668, "grad_norm": 0.00669672479853034, "learning_rate": 9.444444444444445e-07, "loss": 0.0847, "num_tokens": 34294651.0, "reward": 1.001321792602539, "reward_std": 0.0016973735764622688, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7956122159957886, "rewards/format_reward_step": 1.0, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9007718153297901, "aux_distill/mean_u": 0.34749688302032433, "aux_distill/n_active_tok": 137.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5268932038834953, "calib/avg_num_step_conf": 4.3125, "calib/ece": 0.188359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008038834951456311, "calib/mean_conf": 0.006953124999999999, "calib/mu_c": 0.0076, "calib/mu_w": 0.006796116504854369, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006002420572933473, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2773591086828539, "calib/step_q_c_n": 213.0, "calib/step_q_gap": 0.005353497010575581, "calib/step_q_w": 0.2720056116722783, "calib/step_q_w_n": 891.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 652.0, "completions/max_terminated_length": 652.0, "completions/mean_length": 175.8671875, "completions/mean_terminated_length": 176.55686950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.17813333333333334, "grad_norm": 0.01054567750543356, "learning_rate": 9.166666666666666e-07, "loss": 0.0817, "num_tokens": 34469089.0, "reward": 0.9936298727989197, "reward_std": 0.024928880855441093, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.7997597455978394, "rewards/format_reward_step": 0.9921875, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9209919981658459, "aux_distill/mean_u": 0.3350003984083795, "aux_distill/n_active_tok": 143.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4944485566247224, "calib/avg_num_step_conf": 4.49609375, "calib/ece": 0.23817968750000001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.9772185212599556e-05, "calib/mean_conf": 0.0079140625, "calib/mu_c": 0.007936507936507936, "calib/mu_w": 0.007906735751295337, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005863722345583371, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3273705179282868, "calib/step_q_c_n": 251.0, "calib/step_q_gap": 0.016279406817175712, "calib/step_q_w": 0.3110911111111111, "calib/step_q_w_n": 900.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 180.86328125, "completions/mean_terminated_length": 181.5725555419922, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.1792, "grad_norm": 0.0089962063357234, "learning_rate": 8.88888888888889e-07, "loss": 0.1004, "num_tokens": 34643870.0, "reward": 1.0019044876098633, "reward_std": 0.003158072242513299, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7577154636383057, "rewards/format_reward_step": 1.0, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8786559756845236, "aux_distill/mean_u": 0.2959150388126449, "aux_distill/n_active_tok": 151.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5684840425531914, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.1767372549019608, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001719108019639935, "calib/mean_conf": 0.007576470588235294, "calib/mu_c": 0.00897872340425532, "calib/mu_w": 0.007259615384615384, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006041120147894082, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31331683168316826, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.01460620176190841, "calib/step_q_w": 0.29871062992125985, "calib/step_q_w_n": 1016.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 568.0, "completions/max_terminated_length": 568.0, "completions/mean_length": 186.21484375, "completions/mean_terminated_length": 186.9451141357422, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.18026666666666666, "grad_norm": 0.009285873733460903, "learning_rate": 8.611111111111112e-07, "loss": 0.0847, "num_tokens": 34819533.0, "reward": 0.9996484518051147, "reward_std": 0.00795112457126379, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.815703272819519, "rewards/format_reward_step": 0.99609375, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8939372319728136, "aux_distill/mean_u": 0.3145104684178751, "aux_distill/n_active_tok": 150.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49501424501424496, "calib/avg_num_step_conf": 4.7109375, "calib/ece": 0.14479607843137257, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00023183760683760483, "calib/mean_conf": 0.008145098039215686, "calib/mu_c": 0.00794871794871795, "calib/mu_w": 0.008180555555555555, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005175974426607817, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28104651162790695, "calib/step_q_c_n": 172.0, "calib/step_q_gap": -0.02617108991948186, "calib/step_q_w": 0.3072176015473888, "calib/step_q_w_n": 1034.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 182.0546875, "completions/mean_terminated_length": 182.7686309814453, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.18133333333333335, "grad_norm": 0.007290273439139128, "learning_rate": 8.333333333333333e-07, "loss": 0.1107, "num_tokens": 34994099.0, "reward": 0.9972583055496216, "reward_std": 0.01242972444742918, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8460791110992432, "rewards/format_reward_step": 0.99609375, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8823307789862156, "aux_distill/mean_u": 0.3094804324683254, "aux_distill/n_active_tok": 151.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47939958592132503, "calib/avg_num_step_conf": 4.78125, "calib/ece": 0.1715625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006293995859213237, "calib/mean_conf": 0.008125, "calib/mu_c": 0.007608695652173915, "calib/mu_w": 0.008238095238095239, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005962120008855911, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2806111111111111, "calib/step_q_c_n": 180.0, "calib/step_q_gap": -0.018162835249042164, "calib/step_q_w": 0.29877394636015325, "calib/step_q_w_n": 1044.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 572.0, "completions/max_terminated_length": 572.0, "completions/mean_length": 190.0859375, "completions/mean_terminated_length": 190.83139038085938, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.1824, "grad_norm": 0.008502909913659096, "learning_rate": 8.055555555555557e-07, "loss": 0.0798, "num_tokens": 35173465.0, "reward": 1.0013163089752197, "reward_std": 0.0020688537042587996, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8229453563690186, "rewards/format_reward_step": 1.0, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8792869746685028, "aux_distill/mean_u": 0.25173040070577396, "aux_distill/n_active_tok": 145.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5367481980385206, "calib/avg_num_step_conf": 4.546875, "calib/ece": 0.143828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008436724565756837, "calib/mean_conf": 0.008515624999999999, "calib/mu_c": 0.009230769230769232, "calib/mu_w": 0.008387096774193548, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005316519619015339, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33072847682119205, "calib/step_q_c_n": 151.0, "calib/step_q_gap": -0.005166192477919496, "calib/step_q_w": 0.33589466929911155, "calib/step_q_w_n": 1013.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 525.0, "completions/max_terminated_length": 525.0, "completions/mean_length": 179.9140625, "completions/mean_terminated_length": 180.61961364746094, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.18346666666666667, "grad_norm": 0.008968736976385117, "learning_rate": 7.777777777777779e-07, "loss": 0.0966, "num_tokens": 35346683.0, "reward": 1.0013558864593506, "reward_std": 0.0021096221171319485, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8503679633140564, "rewards/format_reward_step": 1.0, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8806129209697247, "aux_distill/mean_u": 0.27734205629014846, "aux_distill/n_active_tok": 143.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5025084299695698, "calib/avg_num_step_conf": 4.51171875, "calib/ece": 0.2380859375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001159634838391304, "calib/mean_conf": 0.0080078125, "calib/mu_c": 0.008095238095238095, "calib/mu_w": 0.007979274611398964, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005688524761732496, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32799242424242425, "calib/step_q_c_n": 264.0, "calib/step_q_gap": 0.045119248035914716, "calib/step_q_w": 0.28287317620650954, "calib/step_q_w_n": 891.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 571.0, "completions/max_terminated_length": 571.0, "completions/mean_length": 183.71875, "completions/mean_terminated_length": 184.43922424316406, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.18453333333333333, "grad_norm": 0.009307953529059887, "learning_rate": 7.5e-07, "loss": 0.067, "num_tokens": 35520683.0, "reward": 1.001943826675415, "reward_std": 0.0026724967174232006, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7577941417694092, "rewards/format_reward_step": 1.0, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8576763272285461, "aux_distill/mean_u": 0.3163159836046228, "aux_distill/n_active_tok": 157.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46376619941814334, "calib/avg_num_step_conf": 4.94921875, "calib/ece": 0.2145703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009230362338005817, "calib/mean_conf": 0.0080859375, "calib/mu_c": 0.00736842105263158, "calib/mu_w": 0.008291457286432161, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005576915791554841, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2866023166023166, "calib/step_q_c_n": 259.0, "calib/step_q_gap": 0.0005989435864436343, "calib/step_q_w": 0.286003373015873, "calib/step_q_w_n": 1008.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 560.0, "completions/max_terminated_length": 560.0, "completions/mean_length": 201.10546875, "completions/mean_terminated_length": 201.89413452148438, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.1856, "grad_norm": 0.008429630659520626, "learning_rate": 7.222222222222222e-07, "loss": 0.0903, "num_tokens": 35700206.0, "reward": 1.0015923976898193, "reward_std": 0.0024176673032343388, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7805285453796387, "rewards/format_reward_step": 1.0, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8765918333083391, "aux_distill/mean_u": 0.2949703924609209, "aux_distill/n_active_tok": 153.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5479222231781813, "calib/avg_num_step_conf": 4.8984375, "calib/ece": 0.22276171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000997418910780352, "calib/mean_conf": 0.00770703125, "calib/mu_c": 0.008474576271186442, "calib/mu_w": 0.00747715736040609, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005341109487880157, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.356, "calib/step_q_c_n": 255.0, "calib/step_q_gap": 0.0870982982982983, "calib/step_q_w": 0.2689017017017017, "calib/step_q_w_n": 999.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 723.0, "completions/max_terminated_length": 723.0, "completions/mean_length": 192.16015625, "completions/mean_terminated_length": 192.9137420654297, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.18666666666666668, "grad_norm": 0.009075365960597992, "learning_rate": 6.944444444444446e-07, "loss": 0.0763, "num_tokens": 35879031.0, "reward": 1.0019090175628662, "reward_std": 0.0027942899614572525, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7733495831489563, "rewards/format_reward_step": 1.0, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9150636978447437, "aux_distill/mean_u": 0.35805016530399253, "aux_distill/n_active_tok": 153.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47607655502392343, "calib/avg_num_step_conf": 4.80078125, "calib/ece": 0.17532421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00048651124910923174, "calib/mean_conf": 0.00826953125, "calib/mu_c": 0.007872340425531916, "calib/mu_w": 0.008358851674641147, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0055840181908078925, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24904255319148935, "calib/step_q_c_n": 188.0, "calib/step_q_gap": -0.0616162364338709, "calib/step_q_w": 0.31065878962536025, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 471.0, "completions/max_terminated_length": 471.0, "completions/mean_length": 188.4453125, "completions/mean_terminated_length": 189.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.18773333333333334, "grad_norm": 0.008459899574518204, "learning_rate": 6.666666666666667e-07, "loss": 0.094, "num_tokens": 36055145.0, "reward": 1.0013954639434814, "reward_std": 0.002379113342612982, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8191972970962524, "rewards/format_reward_step": 1.0, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8599807396531105, "aux_distill/mean_u": 0.2972779997420456, "aux_distill/n_active_tok": 154.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48812477844735913, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.1441796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 4.844617747843706e-05, "calib/mean_conf": 0.0081640625, "calib/mu_c": 0.008205128205128207, "calib/mu_w": 0.00815668202764977, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005603120424914474, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3767272727272728, "calib/step_q_c_n": 165.0, "calib/step_q_gap": 0.09225285412262157, "calib/step_q_w": 0.2844744186046512, "calib/step_q_w_n": 1075.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 554.0, "completions/max_terminated_length": 554.0, "completions/mean_length": 193.140625, "completions/mean_terminated_length": 193.89805603027344, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.1888, "grad_norm": 0.007624008227139711, "learning_rate": 6.388888888888889e-07, "loss": 0.1008, "num_tokens": 36232229.0, "reward": 1.0012009143829346, "reward_std": 0.0019222039263695478, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8500581979751587, "rewards/format_reward_step": 1.0, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8995106462389231, "aux_distill/mean_u": 0.31550128866836435, "aux_distill/n_active_tok": 151.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5246366536689118, "calib/avg_num_step_conf": 4.8125, "calib/ece": 0.1437890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004950963015479173, "calib/mean_conf": 0.008554687500000002, "calib/mu_c": 0.008974358974358977, "calib/mu_w": 0.00847926267281106, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005140690301636906, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2966787096774194, "calib/step_q_c_n": 155.0, "calib/step_q_gap": 0.010712135861263405, "calib/step_q_w": 0.285966573816156, "calib/step_q_w_n": 1077.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 653.0, "completions/max_terminated_length": 653.0, "completions/mean_length": 189.65234375, "completions/mean_terminated_length": 190.39608764648438, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.18986666666666666, "grad_norm": 0.011668938212096691, "learning_rate": 6.111111111111112e-07, "loss": 0.0498, "num_tokens": 36410660.0, "reward": 1.0013173818588257, "reward_std": 0.002397837582975626, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8502910137176514, "rewards/format_reward_step": 1.0, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8968758918344975, "aux_distill/mean_u": 0.3325099115013923, "aux_distill/n_active_tok": 158.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49503657262277956, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.21822265625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001401950539881569, "calib/mean_conf": 0.00833984375, "calib/mu_c": 0.008448275862068965, "calib/mu_w": 0.008308080808080808, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006065860406866115, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2133955223880597, "calib/step_q_c_n": 268.0, "calib/step_q_gap": -0.08181862299504439, "calib/step_q_w": 0.2952141453831041, "calib/step_q_w_n": 1018.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 602.0, "completions/max_terminated_length": 602.0, "completions/mean_length": 196.3046875, "completions/mean_terminated_length": 197.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.19093333333333334, "grad_norm": 0.009127902798354626, "learning_rate": 5.833333333333334e-07, "loss": 0.0987, "num_tokens": 36590986.0, "reward": 1.0018608570098877, "reward_std": 0.0030939499847590923, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7771593332290649, "rewards/format_reward_step": 1.0, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.870330423116684, "aux_distill/mean_u": 0.334101875639815, "aux_distill/n_active_tok": 157.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48820831968555517, "calib/avg_num_step_conf": 4.9140625, "calib/ece": 0.159375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00026640462932634584, "calib/mean_conf": 0.00859375, "calib/mu_c": 0.008372093023255815, "calib/mu_w": 0.00863849765258216, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0057600964347396125, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25797927461139897, "calib/step_q_c_n": 193.0, "calib/step_q_gap": -0.04420983336982165, "calib/step_q_w": 0.3021891079812206, "calib/step_q_w_n": 1065.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 576.0, "completions/max_terminated_length": 576.0, "completions/mean_length": 195.99609375, "completions/mean_terminated_length": 196.7647247314453, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.192, "grad_norm": 0.009810417890548706, "learning_rate": 5.555555555555555e-07, "loss": 0.086, "num_tokens": 36768825.0, "reward": 1.0013527870178223, "reward_std": 0.0027455103117972612, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.834736704826355, "rewards/format_reward_step": 1.0, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8936001621186733, "aux_distill/mean_u": 0.2772019469641405, "aux_distill/n_active_tok": 154.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5177832919768404, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.14393359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00036311000827130013, "calib/mean_conf": 0.00841015625, "calib/mu_c": 0.00871794871794872, "calib/mu_w": 0.008354838709677419, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005229847330523707, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2793037974683544, "calib/step_q_c_n": 158.0, "calib/step_q_gap": 0.001608064629764372, "calib/step_q_w": 0.27769573283859, "calib/step_q_w_n": 1078.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 656.0, "completions/max_terminated_length": 656.0, "completions/mean_length": 191.73046875, "completions/mean_terminated_length": 192.48236083984375, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.19306666666666666, "grad_norm": 0.009542922489345074, "learning_rate": 5.277777777777779e-07, "loss": 0.0903, "num_tokens": 36947980.0, "reward": 1.0012791156768799, "reward_std": 0.0020589884370565414, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.850214421749115, "rewards/format_reward_step": 1.0, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8494912981987, "aux_distill/mean_u": 0.2728873570876729, "aux_distill/n_active_tok": 164.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5364191729323309, "calib/avg_num_step_conf": 5.21875, "calib/ece": 0.1016015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008959899749373442, "calib/mean_conf": 0.007773437499999999, "calib/mu_c": 0.008571428571428572, "calib/mu_w": 0.007675438596491228, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005459674388971722, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3235074626865672, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.01208658082300651, "calib/step_q_w": 0.3114208818635607, "calib/step_q_w_n": 1202.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 206.20703125, "completions/mean_terminated_length": 207.0157012939453, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.19413333333333332, "grad_norm": 0.008077757433056831, "learning_rate": 5.000000000000001e-07, "loss": 0.0808, "num_tokens": 37130737.0, "reward": 1.0008924007415771, "reward_std": 0.0018270486034452915, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.8924098014831543, "rewards/format_reward_step": 1.0, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8732987251132727, "aux_distill/mean_u": 0.3000003229976155, "aux_distill/n_active_tok": 148.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5549640145913438, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.1834765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010460416050478167, "calib/mean_conf": 0.0079296875, "calib/mu_c": 0.008775510204081634, "calib/mu_w": 0.007729468599033817, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005078575701153203, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.35028169014084515, "calib/step_q_c_n": 213.0, "calib/step_q_gap": 0.05462181234043778, "calib/step_q_w": 0.29565987780040737, "calib/step_q_w_n": 982.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 475.0, "completions/max_terminated_length": 475.0, "completions/mean_length": 190.69921875, "completions/mean_terminated_length": 191.4470672607422, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1952, "grad_norm": 0.009582036174833775, "learning_rate": 4.7222222222222226e-07, "loss": 0.0906, "num_tokens": 37310044.0, "reward": 1.0016353130340576, "reward_std": 0.0023358927574008703, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8118644952774048, "rewards/format_reward_step": 1.0, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9122953005135059, "aux_distill/mean_u": 0.32397631622647044, "aux_distill/n_active_tok": 163.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5520361990950227, "calib/avg_num_step_conf": 5.109375, "calib/ece": 0.194765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010935143288084495, "calib/mean_conf": 0.008359375, "calib/mu_c": 0.009230769230769233, "calib/mu_w": 0.008137254901960784, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005344001273332091, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27924603174603174, "calib/step_q_c_n": 252.0, "calib/step_q_gap": -0.0157066197691198, "calib/step_q_w": 0.29495265151515154, "calib/step_q_w_n": 1056.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 481.0, "completions/max_terminated_length": 481.0, "completions/mean_length": 206.30859375, "completions/mean_terminated_length": 207.11766052246094, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.19626666666666667, "grad_norm": 0.01037457212805748, "learning_rate": 4.444444444444445e-07, "loss": 0.0966, "num_tokens": 37491947.0, "reward": 1.0018258094787598, "reward_std": 0.002241644309833646, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.8005265593528748, "rewards/format_reward_step": 1.0, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8742652013897896, "aux_distill/mean_u": 0.31161982672906746, "aux_distill/n_active_tok": 159.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4833984375, "calib/avg_num_step_conf": 5.02734375, "calib/ece": 0.241640625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003125000000000003, "calib/mean_conf": 0.008359374999999999, "calib/mu_c": 0.008125, "calib/mu_w": 0.0084375, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005344001273332091, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3025913978494624, "calib/step_q_c_n": 279.0, "calib/step_q_gap": -0.0333262608806964, "calib/step_q_w": 0.3359176587301588, "calib/step_q_w_n": 1008.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 613.0, "completions/max_terminated_length": 613.0, "completions/mean_length": 200.53515625, "completions/mean_terminated_length": 201.3215789794922, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.19733333333333333, "grad_norm": 0.008647466078400612, "learning_rate": 4.1666666666666667e-07, "loss": 0.0808, "num_tokens": 37674012.0, "reward": 1.0019819736480713, "reward_std": 0.002680209930986166, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7539640665054321, "rewards/format_reward_step": 1.0, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9008009806275368, "aux_distill/mean_u": 0.32494249858645763, "aux_distill/n_active_tok": 152.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4924249465563714, "calib/avg_num_step_conf": 4.8046875, "calib/ece": 0.197578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -2.4165814666788493e-05, "calib/mean_conf": 0.009453125, "calib/mu_c": 0.009433962264150945, "calib/mu_w": 0.009458128078817733, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0052004497627008185, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.39382064777327935, "calib/step_q_c_n": 247.0, "calib/step_q_gap": 0.08945645652200773, "calib/step_q_w": 0.3043641912512716, "calib/step_q_w_n": 983.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 583.0, "completions/max_terminated_length": 583.0, "completions/mean_length": 197.9296875, "completions/mean_terminated_length": 198.7058868408203, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.1984, "grad_norm": 0.00890508946031332, "learning_rate": 3.8888888888888895e-07, "loss": 0.0811, "num_tokens": 37853530.0, "reward": 1.0018949508666992, "reward_std": 0.0025664858985692263, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7967585325241089, "rewards/format_reward_step": 1.0, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8909507598727942, "aux_distill/mean_u": 0.2999026721666665, "aux_distill/n_active_tok": 150.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5296495956873315, "calib/avg_num_step_conf": 4.72265625, "calib/ece": 0.1974609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000779812250209129, "calib/mean_conf": 0.0095703125, "calib/mu_c": 0.010188679245283019, "calib/mu_w": 0.00940886699507389, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.005097768497327409, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3390128755364807, "calib/step_q_c_n": 233.0, "calib/step_q_gap": 0.033364309962710226, "calib/step_q_w": 0.30564856557377046, "calib/step_q_w_n": 976.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 702.0, "completions/max_terminated_length": 702.0, "completions/mean_length": 188.59375, "completions/mean_terminated_length": 189.33334350585938, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.19946666666666665, "grad_norm": 0.009559610858559608, "learning_rate": 3.611111111111111e-07, "loss": 0.0774, "num_tokens": 38027162.0, "reward": 0.9981445074081421, "reward_std": 0.013470064848661423, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7931640148162842, "rewards/format_reward_step": 0.99609375, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.930905407294631, "aux_distill/mean_u": 0.3330317749775051, "aux_distill/n_active_tok": 147.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5486639676113361, "calib/avg_num_step_conf": 4.62109375, "calib/ece": 0.24642745098039215, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010149797570850206, "calib/mean_conf": 0.00847450980392157, "calib/mu_c": 0.009230769230769232, "calib/mu_w": 0.008215789473684211, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004622020742067258, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2894666666666667, "calib/step_q_c_n": 270.0, "calib/step_q_gap": -0.016119751734209542, "calib/step_q_w": 0.30558641840087625, "calib/step_q_w_n": 913.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 188.40625, "completions/mean_terminated_length": 189.14511108398438, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.20053333333333334, "grad_norm": 0.009920141659677029, "learning_rate": 3.3333333333333335e-07, "loss": 0.1053, "num_tokens": 38203274.0, "reward": 1.000344157218933, "reward_std": 0.008894114755094051, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7467821836471558, "rewards/format_reward_step": 0.99609375, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8790177255868912, "aux_distill/mean_u": 0.318059983412503, "aux_distill/n_active_tok": 168.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47647527910685805, "calib/avg_num_step_conf": 5.28515625, "calib/ece": 0.249671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005569377990430604, "calib/mean_conf": 0.008140625, "calib/mu_c": 0.007727272727272728, "calib/mu_w": 0.008284210526315788, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005203265763861674, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3135640138408305, "calib/step_q_c_n": 289.0, "calib/step_q_gap": 0.0037642018107553743, "calib/step_q_w": 0.3097998120300751, "calib/step_q_w_n": 1064.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 496.0, "completions/max_terminated_length": 496.0, "completions/mean_length": 203.83984375, "completions/mean_terminated_length": 204.63922119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.2016, "grad_norm": 0.009484932757914066, "learning_rate": 3.055555555555556e-07, "loss": 0.0791, "num_tokens": 38387033.0, "reward": 1.0019454956054688, "reward_std": 0.0023256679996848106, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7460785508155823, "rewards/format_reward_step": 1.0, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.842708757147193, "aux_distill/mean_u": 0.3038010499938975, "aux_distill/n_active_tok": 172.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5113366571699904, "calib/avg_num_step_conf": 5.390625, "calib/ece": 0.14368627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00027421652421652683, "calib/mean_conf": 0.009254901960784314, "calib/mu_c": 0.00948717948717949, "calib/mu_w": 0.009212962962962963, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004584043669396849, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3494915254237288, "calib/step_q_c_n": 177.0, "calib/step_q_gap": 0.018417128083745404, "calib/step_q_w": 0.3310743973399834, "calib/step_q_w_n": 1203.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 203.2265625, "completions/mean_terminated_length": 204.02354431152344, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.20266666666666666, "grad_norm": 0.008105236105620861, "learning_rate": 2.7777777777777776e-07, "loss": 0.0789, "num_tokens": 38568475.0, "reward": 0.9994391202926636, "reward_std": 0.007705869153141975, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8465343713760376, "rewards/format_reward_step": 0.99609375, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8719021435827017, "aux_distill/mean_u": 0.26051567777230705, "aux_distill/n_active_tok": 152.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5524636653668912, "calib/avg_num_step_conf": 4.7890625, "calib/ece": 0.14328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011059907834101417, "calib/mean_conf": 0.0090625, "calib/mu_c": 0.010000000000000004, "calib/mu_w": 0.008894009216589862, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004227421643271464, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36428571428571427, "calib/step_q_c_n": 154.0, "calib/step_q_gap": 0.0468883262260128, "calib/step_q_w": 0.31739738805970147, "calib/step_q_w_n": 1072.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 558.0, "completions/max_terminated_length": 558.0, "completions/mean_length": 182.8671875, "completions/mean_terminated_length": 183.58432006835938, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.20373333333333332, "grad_norm": 0.010515516623854637, "learning_rate": 2.5000000000000004e-07, "loss": 0.0907, "num_tokens": 38743265.0, "reward": 1.0014734268188477, "reward_std": 0.0026649045757949352, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8506031036376953, "rewards/format_reward_step": 1.0, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.872589722275734, "aux_distill/mean_u": 0.282911263266772, "aux_distill/n_active_tok": 159.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47913813934756344, "calib/avg_num_step_conf": 5.0859375, "calib/ece": 0.24546875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003801852597664114, "calib/mean_conf": 0.0084375, "calib/mu_c": 0.008153846153846154, "calib/mu_w": 0.008534031413612565, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0050678983563208916, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29670033670033674, "calib/step_q_c_n": 297.0, "calib/step_q_gap": 0.0076754610784461885, "calib/step_q_w": 0.28902487562189055, "calib/step_q_w_n": 1005.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 197.97265625, "completions/mean_terminated_length": 198.74903869628906, "completions/min_length": 0.0, "completions/min_terminated_length": 39.0, "epoch": 0.2048, "grad_norm": 0.011404029093682766, "learning_rate": 2.2222222222222224e-07, "loss": 0.0721, "num_tokens": 38922730.0, "reward": 1.0000687837600708, "reward_std": 0.0084780128672719, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7501375079154968, "rewards/format_reward_step": 0.99609375, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.896713113412261, "aux_distill/mean_u": 0.3166824893453891, "aux_distill/n_active_tok": 165.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48357371794871795, "calib/avg_num_step_conf": 5.2734375, "calib/ece": 0.178671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -9.61538461538465e-05, "calib/mean_conf": 0.008828125, "calib/mu_c": 0.008749999999999999, "calib/mu_w": 0.008846153846153846, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0046972554736116915, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29605691056910577, "calib/step_q_c_n": 246.0, "calib/step_q_gap": 0.010277019264757936, "calib/step_q_w": 0.28577989130434783, "calib/step_q_w_n": 1104.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 585.0, "completions/max_terminated_length": 585.0, "completions/mean_length": 204.1875, "completions/mean_terminated_length": 204.98825073242188, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.20586666666666667, "grad_norm": 0.00794635247439146, "learning_rate": 1.9444444444444447e-07, "loss": 0.1028, "num_tokens": 39104522.0, "reward": 1.001590609550476, "reward_std": 0.0025997646152973175, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8156812191009521, "rewards/format_reward_step": 1.0, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9085062127560377, "aux_distill/mean_u": 0.2996982017706232, "aux_distill/n_active_tok": 144.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5206418184857095, "calib/avg_num_step_conf": 4.51171875, "calib/ece": 0.2343764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005717867290656875, "calib/mean_conf": 0.00876078431372549, "calib/mu_c": 0.009193548387096776, "calib/mu_w": 0.008621761658031088, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0054680561994678014, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2779622641509434, "calib/step_q_c_n": 265.0, "calib/step_q_gap": -0.011389421242315068, "calib/step_q_w": 0.28935168539325845, "calib/step_q_w_n": 890.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2557.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 189.11328125, "completions/mean_terminated_length": 189.11328125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.20693333333333333, "grad_norm": 0.010015045292675495, "learning_rate": 1.6666666666666668e-07, "loss": 0.1306, "num_tokens": 39282687.0, "reward": 0.9982671737670898, "reward_std": 0.014264973811805248, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7582530975341797, "rewards/format_reward_step": 0.99609375, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8857430908828974, "aux_distill/mean_u": 0.3386770414670152, "aux_distill/n_active_tok": 155.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4878806089743589, "calib/avg_num_step_conf": 4.87890625, "calib/ece": 0.17875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 1.734723475976807e-18, "calib/mean_conf": 0.008749999999999999, "calib/mu_c": 0.00875, "calib/mu_w": 0.008749999999999999, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005077524002897475, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.34873777777777776, "calib/step_q_c_n": 225.0, "calib/step_q_gap": 0.04072019965277779, "calib/step_q_w": 0.30801757812499997, "calib/step_q_w_n": 1024.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 191.8359375, "completions/mean_terminated_length": 192.58824157714844, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.208, "grad_norm": 0.012000245973467827, "learning_rate": 1.3888888888888888e-07, "loss": 0.0915, "num_tokens": 39461589.0, "reward": 1.0015894174575806, "reward_std": 0.0027912305667996407, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8156788945198059, "rewards/format_reward_step": 1.0, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9231633394956589, "aux_distill/mean_u": 0.3158360285345036, "aux_distill/n_active_tok": 145.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5147087378640778, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.1861328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002737864077669936, "calib/mean_conf": 0.0091796875, "calib/mu_c": 0.009400000000000002, "calib/mu_w": 0.009126213592233009, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.004297407637441874, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.364247311827957, "calib/step_q_c_n": 186.0, "calib/step_q_gap": 0.05065931589713302, "calib/step_q_w": 0.313587995930824, "calib/step_q_w_n": 983.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 180.30859375, "completions/mean_terminated_length": 181.0157012939453, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.20906666666666668, "grad_norm": 0.009502574801445007, "learning_rate": 1.1111111111111112e-07, "loss": 0.1179, "num_tokens": 39634100.0, "reward": 0.9978784918785095, "reward_std": 0.01335802674293518, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.804350733757019, "rewards/format_reward_step": 0.99609375, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.9447934832423925, "aux_distill/mean_u": 0.3606861734221428, "aux_distill/n_active_tok": 166.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5377046325322188, "calib/avg_num_step_conf": 5.23828125, "calib/ece": 0.2174609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000715778474399166, "calib/mean_conf": 0.0091015625, "calib/mu_c": 0.009655172413793104, "calib/mu_w": 0.008939393939393938, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00503633647194007, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28548576779026225, "calib/step_q_c_n": 267.0, "calib/step_q_gap": -0.04320138304772658, "calib/step_q_w": 0.32868715083798883, "calib/step_q_w_n": 1074.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 534.0, "completions/max_terminated_length": 534.0, "completions/mean_length": 193.2421875, "completions/mean_terminated_length": 194.00001525878906, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.21013333333333334, "grad_norm": 0.012657982297241688, "learning_rate": 8.333333333333334e-08, "loss": 0.1053, "num_tokens": 39812434.0, "reward": 1.0021333694458008, "reward_std": 0.002967329230159521, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7777043581008911, "rewards/format_reward_step": 1.0, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8860240560024977, "aux_distill/mean_u": 0.3080030762756176, "aux_distill/n_active_tok": 153.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5506582411795682, "calib/avg_num_step_conf": 4.84765625, "calib/ece": 0.166953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011521853607161688, "calib/mean_conf": 0.008828125, "calib/mu_c": 0.00977777777777778, "calib/mu_w": 0.00862559241706161, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004940441173050742, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3725735, "calib/step_q_c_n": 200.0, "calib/step_q_gap": 0.05898080067243039, "calib/step_q_w": 0.3135926993275696, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 477.0, "completions/max_terminated_length": 477.0, "completions/mean_length": 184.109375, "completions/mean_terminated_length": 184.83139038085938, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.2112, "grad_norm": 0.01102263294160366, "learning_rate": 5.555555555555556e-08, "loss": 0.0579, "num_tokens": 39988758.0, "reward": 1.0016674995422363, "reward_std": 0.002053692238405347, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8275538682937622, "rewards/format_reward_step": 1.0, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.8891988657414913, "aux_distill/mean_u": 0.32847913300924675, "aux_distill/n_active_tok": 159.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5358375234151632, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.1828515625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000777876367938482, "calib/mean_conf": 0.0085546875, "calib/mu_c": 0.009183673469387756, "calib/mu_w": 0.008405797101449274, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00564760540205703, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.263408071748879, "calib/step_q_c_n": 223.0, "calib/step_q_gap": -0.06313928674168712, "calib/step_q_w": 0.3265473584905661, "calib/step_q_w_n": 1060.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 487.0, "completions/max_terminated_length": 487.0, "completions/mean_length": 189.8828125, "completions/mean_terminated_length": 190.62745666503906, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.21226666666666666, "grad_norm": 0.010852034203708172, "learning_rate": 2.777777777777778e-08, "loss": 0.1011, "num_tokens": 40165376.0, "reward": 1.001705288887024, "reward_std": 0.002582351677119732, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8120043277740479, "rewards/format_reward_step": 1.0, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.866759080439806, "aux_distill/mean_u": 0.31014469211236895, "aux_distill/n_active_tok": 161.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5681837735524391, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.2224609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014875677535920182, "calib/mean_conf": 0.0080078125, "calib/mu_c": 0.009152542372881357, "calib/mu_w": 0.007664974619289339, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.005032699470944371, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3124904214559387, "calib/step_q_c_n": 261.0, "calib/step_q_gap": 0.0130416787093236, "calib/step_q_w": 0.2994487427466151, "calib/step_q_w_n": 1034.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 567.0, "completions/max_terminated_length": 567.0, "completions/mean_length": 201.74609375, "completions/mean_terminated_length": 202.53726196289062, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.21333333333333335, "grad_norm": 0.008249734528362751, "learning_rate": 0.0, "loss": 0.1024, "num_tokens": 40348879.0, "reward": 1.0020647048950195, "reward_std": 0.0021201735362410545, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7736605405807495, "rewards/format_reward_step": 1.0, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.10086522626224906, "train_runtime": 13306.6332, "train_samples_per_second": 3.848, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 40348879, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }