{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.4131654458386558, "aux_distill/mean_u": 0.31677682190706, "aux_distill/n_active_tok": 24.571428571428573, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.8921052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.19807541241162607, "calib/step_q_w": 0.6940298507462687, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.04508271813392639, "learning_rate": 2.5000000000000004e-07, "loss": 0.2169, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1092121005058289, "aux_distill/mean_u": 0.2935626227740425, "aux_distill/n_active_tok": 28.63157894736842, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.781, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.042553719008264435, "calib/step_q_w": 0.8235537190082645, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.03766733407974243, "learning_rate": 5.000000000000001e-07, "loss": 0.263, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.375932554403941, "aux_distill/mean_u": 0.24507726546495304, "aux_distill/n_active_tok": 23.0, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.24242424242424243, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.7335714285714285, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8571428571428571, "calib/gap": -0.05242424242424237, "calib/mean_conf": 0.947857142857143, "calib/mu_c": 0.9066666666666666, "calib/mu_w": 0.959090909090909, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7335714285714285, "calib/std_conf": 0.056083938549867415, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.6900000000000001, "calib/step_q_c_n": 17.0, "calib/step_q_gap": -0.17057692307692285, "calib/step_q_w": 0.8605769230769229, "calib/step_q_w_n": 52.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 693.890625, "completions/mean_terminated_length": 746.3698120117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.03947603330016136, "learning_rate": 7.5e-07, "loss": 0.2027, "num_tokens": 816359.0, "reward": 0.03406660258769989, "reward_std": 0.07613541185855865, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.013445701450109482, "rewards/format_reward_step": 0.04296875, "step": 3 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.2811786349003131, "aux_distill/mean_u": 0.3183269252095441, "aux_distill/n_active_tok": 22.153846153846153, "calib/answer_extract_rate": 0.06640625, "calib/avg_num_step_conf": 0.28125, "calib/ece": 0.7961538461538462, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/mean_conf": 0.7961538461538461, "calib/mu_c": NaN, "calib/mu_w": 0.7961538461538461, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7961538461538462, "calib/std_conf": 0.322121297305091, "calib/step_conf_rate": 0.05859375, "calib/step_q_w": 0.7857407407407409, "calib/step_q_w_n": 72.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 648.04296875, "completions/mean_terminated_length": 699.9957275390625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.03017072007060051, "learning_rate": 1.0000000000000002e-06, "loss": 0.1711, "num_tokens": 1088426.0, "reward": 0.019221873953938484, "reward_std": 0.0504031628370285, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.007193749770522118, "rewards/format_reward_step": 0.03125, "step": 4 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1949924447319724, "aux_distill/mean_u": 0.36153202551690744, "aux_distill/n_active_tok": 33.45454545454545, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.375, "calib/avg_num_step_conf": 0.359375, "calib/ece": 0.6869230769230771, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": -0.1050000000000001, "calib/mean_conf": 0.9176923076923077, "calib/mu_c": 0.845, "calib/mu_w": 0.9500000000000001, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.6484615384615386, "calib/std_conf": 0.12490943464728885, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.7553846153846155, "calib/step_q_c_n": 13.0, "calib/step_q_gap": -0.02170525803310608, "calib/step_q_w": 0.7770898734177216, "calib/step_q_w_n": 79.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3058.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 739.875, "completions/mean_terminated_length": 827.1091918945312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.018968043848872185, "learning_rate": 1.25e-06, "loss": 0.173, "num_tokens": 1384522.0, "reward": 0.034125782549381256, "reward_std": 0.07533922791481018, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.013564062304794788, "rewards/format_reward_step": 0.0390625, "step": 5 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.3305943420058803, "aux_distill/mean_u": 0.3693736034622527, "aux_distill/n_active_tok": 24.63157894736842, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.2777777777777778, "calib/avg_num_step_conf": 0.48046875, "calib/ece": 0.8085, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.6, "calib/gap": -0.07611111111111091, "calib/mean_conf": 0.9085000000000001, "calib/mu_c": 0.8400000000000001, "calib/mu_w": 0.916111111111111, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.8085, "calib/std_conf": 0.09078959191449205, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.858125, "calib/step_q_c_n": 16.0, "calib/step_q_gap": 0.034386682242990574, "calib/step_q_w": 0.8237383177570095, "calib/step_q_w_n": 107.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 626.8515625, "completions/mean_terminated_length": 697.7130126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0064, "grad_norm": 0.04654983431100845, "learning_rate": 1.5e-06, "loss": 0.2868, "num_tokens": 1650948.0, "reward": 0.047386325895786285, "reward_std": 0.09225471317768097, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01664765551686287, "rewards/format_reward_step": 0.06640625, "step": 6 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.3733870275318623, "aux_distill/mean_u": 0.36923659447005963, "aux_distill/n_active_tok": 27.5, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.32307692307692304, "calib/avg_num_step_conf": 0.43359375, "calib/ece": 0.6955555555555555, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.0546875, "calib/frac_conf_gt_0.9": 0.8888888888888888, "calib/gap": -0.020615384615384924, "calib/mean_conf": 0.958888888888889, "calib/mu_c": 0.944, "calib/mu_w": 0.9646153846153849, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.6883333333333332, "calib/std_conf": 0.029979416807182312, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.7900111111111112, "calib/step_q_c_n": 18.0, "calib/step_q_gap": 0.07941218637992853, "calib/step_q_w": 0.7105989247311827, "calib/step_q_w_n": 93.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 739.20703125, "completions/mean_terminated_length": 791.78662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.007466666666666667, "grad_norm": 0.04697367548942566, "learning_rate": 1.75e-06, "loss": 0.2593, "num_tokens": 1947609.0, "reward": 0.04775039106607437, "reward_std": 0.11989802122116089, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.021282030269503593, "rewards/format_reward_step": 0.0546875, "step": 7 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.3266922648136432, "aux_distill/mean_u": 0.37519659554870155, "aux_distill/n_active_tok": 30.153846153846153, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.4666666666666667, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.5494117647058824, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7058823529411765, "calib/gap": 0.04599999999999993, "calib/mean_conf": 0.8435294117647059, "calib/mu_c": 0.876, "calib/mu_w": 0.8300000000000001, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.5494117647058824, "calib/std_conf": 0.24369559547229233, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.6555000000000001, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.0959102564102563, "calib/step_q_w": 0.7514102564102564, "calib/step_q_w_n": 78.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 654.44921875, "completions/mean_terminated_length": 709.9110107421875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.008533333333333334, "grad_norm": 0.029148366302251816, "learning_rate": 2.0000000000000003e-06, "loss": 0.1835, "num_tokens": 2221660.0, "reward": 0.04404374584555626, "reward_std": 0.09272695332765579, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.025587501004338264, "rewards/format_reward_step": 0.04296875, "step": 8 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.3627358720852778, "aux_distill/mean_u": 0.32543559097750124, "aux_distill/n_active_tok": 16.615384615384617, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.47, "calib/avg_num_step_conf": 0.2109375, "calib/ece": 0.5559999999999999, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.7333333333333333, "calib/gap": 0.030999999999999917, "calib/mean_conf": 0.8693333333333333, "calib/mu_c": 0.8899999999999999, "calib/mu_w": 0.859, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.5459999999999999, "calib/std_conf": 0.2073794161005914, "calib/step_conf_rate": 0.05078125, "calib/step_q_c": 0.782, "calib/step_q_c_n": 15.0, "calib/step_q_gap": -0.029282051282051302, "calib/step_q_w": 0.8112820512820513, "calib/step_q_w_n": 39.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 617.34765625, "completions/mean_terminated_length": 669.665283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.038193874061107635, "learning_rate": 2.25e-06, "loss": 0.1983, "num_tokens": 2487237.0, "reward": 0.03526093810796738, "reward_std": 0.06660518795251846, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.019740624353289604, "rewards/format_reward_step": 0.03125, "step": 9 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.188918528648523, "aux_distill/mean_u": 0.2326953426965051, "aux_distill/n_active_tok": 17.23076923076923, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.7083333333333334, "calib/avg_num_step_conf": 0.21875, "calib/ece": 0.5243571428571429, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.42857142857142855, "calib/gap": 0.28241666666666665, "calib/mean_conf": 0.6329285714285715, "calib/mu_c": 0.875, "calib/mu_w": 0.5925833333333334, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.5072142857142857, "calib/std_conf": 0.371696970333499, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.8833333333333333, "calib/step_q_c_n": 6.0, "calib/step_q_gap": 0.35929333333333335, "calib/step_q_w": 0.52404, "calib/step_q_w_n": 50.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2989.0, "completions/max_terminated_length": 2989.0, "completions/mean_length": 588.48828125, "completions/mean_terminated_length": 649.3663940429688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.0316932275891304, "learning_rate": 2.5e-06, "loss": 0.1596, "num_tokens": 2744690.0, "reward": 0.03827636316418648, "reward_std": 0.09364674240350723, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.025771480053663254, "rewards/format_reward_step": 0.04296875, "step": 10 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1973961561918258, "aux_distill/mean_u": 0.2825744844156909, "aux_distill/n_active_tok": 25.2, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.41666666666666663, "calib/avg_num_step_conf": 0.5, "calib/ece": 0.6805263157894734, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.07421875, "calib/frac_conf_gt_0.9": 0.8421052631578947, "calib/gap": -0.11243589743589744, "calib/mean_conf": 0.9152631578947369, "calib/mu_c": 0.8383333333333334, "calib/mu_w": 0.9507692307692308, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.6399999999999998, "calib/std_conf": 0.1659222525938494, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.7260714285714286, "calib/step_q_c_n": 28.0, "calib/step_q_gap": -0.05922857142857141, "calib/step_q_w": 0.7853, "calib/step_q_w_n": 100.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.10546875, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 662.40625, "completions/mean_terminated_length": 740.506591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.04365599527955055, "learning_rate": 2.7500000000000004e-06, "loss": 0.2666, "num_tokens": 3018746.0, "reward": 0.06175879016518593, "reward_std": 0.12854436039924622, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.025861326605081558, "rewards/format_reward_step": 0.07421875, "step": 11 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.3279105214511646, "aux_distill/mean_u": 0.4500246926560413, "aux_distill/n_active_tok": 22.823529411764707, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.4166666666666667, "calib/avg_num_step_conf": 0.37890625, "calib/ece": 0.6394117647058823, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8823529411764706, "calib/gap": 0.04166666666666674, "calib/mean_conf": 0.9205882352941178, "calib/mu_c": 0.95, "calib/mu_w": 0.9083333333333332, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.078125, "calib/pce": 0.6329411764705882, "calib/std_conf": 0.1596124198409198, "calib/step_conf_rate": 0.078125, "calib/step_q_c": 0.6076666666666667, "calib/step_q_c_n": 30.0, "calib/step_q_gap": -0.1189004975124377, "calib/step_q_w": 0.7265671641791044, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.109375, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 628.9296875, "completions/mean_terminated_length": 706.1666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.05113685876131058, "learning_rate": 3e-06, "loss": 0.2352, "num_tokens": 3283928.0, "reward": 0.0459529273211956, "reward_std": 0.1216077208518982, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.021593358367681503, "rewards/format_reward_step": 0.05078125, "step": 12 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1530120442895329, "aux_distill/mean_u": 0.33127441420315484, "aux_distill/n_active_tok": 32.0, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.5390625, "calib/avg_num_step_conf": 0.546875, "calib/ece": 0.5975, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.55, "calib/gap": 0.07812500000000011, "calib/mean_conf": 0.7474999999999999, "calib/mu_c": 0.81, "calib/mu_w": 0.7318749999999999, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.5725, "calib/std_conf": 0.3290117779046823, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.7300000000000001, "calib/step_q_c_n": 15.0, "calib/step_q_gap": 0.13221559139784955, "calib/step_q_w": 0.5977844086021505, "calib/step_q_w_n": 124.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2938.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 637.359375, "completions/mean_terminated_length": 697.2821044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.04118745028972626, "learning_rate": 3.2500000000000002e-06, "loss": 0.2057, "num_tokens": 3551684.0, "reward": 0.05240878835320473, "reward_std": 0.11728723347187042, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.026692576706409454, "rewards/format_reward_step": 0.0625, "step": 13 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1231984041986012, "aux_distill/mean_u": 0.275275627062822, "aux_distill/n_active_tok": 25.904761904761905, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.3583333333333333, "calib/avg_num_step_conf": 0.53125, "calib/ece": 0.7061434782608695, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.4782608695652174, "calib/gap": -0.11906499999999987, "calib/mean_conf": 0.7635347826086957, "calib/mu_c": 0.66, "calib/mu_w": 0.7790649999999999, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.16015625, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.6696217391304349, "calib/std_conf": 0.30982760368882795, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.44999999999999996, "calib/step_q_c_n": 13.0, "calib/step_q_gap": -0.16527398373983748, "calib/step_q_w": 0.6152739837398374, "calib/step_q_w_n": 123.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 721.92578125, "completions/mean_terminated_length": 789.7991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.014933333333333333, "grad_norm": 0.0329865962266922, "learning_rate": 3.5e-06, "loss": 0.2534, "num_tokens": 3841897.0, "reward": 0.052902527153491974, "reward_std": 0.11711085587739944, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.027680054306983948, "rewards/format_reward_step": 0.06640625, "step": 14 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.1267299324274063, "aux_distill/mean_u": 0.4025237962874277, "aux_distill/n_active_tok": 27.0, "calib/answer_extract_rate": 0.11328125, "calib/auroc": 0.19318181818181815, "calib/avg_num_step_conf": 0.52734375, "calib/ece": 0.7266666666666668, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.5416666666666666, "calib/gap": -0.10636363636363633, "calib/mean_conf": 0.7825000000000001, "calib/mu_c": 0.685, "calib/mu_w": 0.7913636363636364, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.7129166666666668, "calib/std_conf": 0.3053993287484437, "calib/step_conf_rate": 0.11328125, "calib/step_q_c": 0.4879999999999999, "calib/step_q_c_n": 10.0, "calib/step_q_gap": -0.186724, "calib/step_q_w": 0.6747239999999999, "calib/step_q_w_n": 125.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 644.44921875, "completions/mean_terminated_length": 696.1138916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.026582898572087288, "learning_rate": 3.7500000000000005e-06, "loss": 0.2583, "num_tokens": 4114756.0, "reward": 0.061240628361701965, "reward_std": 0.12151821702718735, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.028731251135468483, "rewards/format_reward_step": 0.0859375, "step": 15 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.135950155556202, "aux_distill/mean_u": 0.3597005385226347, "aux_distill/n_active_tok": 25.5, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.3818181818181818, "calib/avg_num_step_conf": 0.3984375, "calib/ece": 0.6481875, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.375, "calib/gap": -0.21601818181818166, "calib/mean_conf": 0.6943125, "calib/mu_c": 0.5458000000000001, "calib/mu_w": 0.7618181818181817, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.515, "calib/std_conf": 0.29892133554457095, "calib/step_conf_rate": 0.08203125, "calib/step_q_c": 0.5407692307692308, "calib/step_q_c_n": 26.0, "calib/step_q_gap": -0.19265182186234808, "calib/step_q_w": 0.7334210526315789, "calib/step_q_w_n": 76.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 733.09765625, "completions/mean_terminated_length": 798.6084594726562, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.02180486172437668, "learning_rate": 4.000000000000001e-06, "loss": 0.209, "num_tokens": 4411277.0, "reward": 0.0470406636595726, "reward_std": 0.11150971055030823, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.023768823593854904, "rewards/format_reward_step": 0.05078125, "step": 16 }, { "aux_distill/lambda": 0.2999999999999999, "aux_distill/loss": 1.048417510986328, "aux_distill/mean_u": 0.352884973460103, "aux_distill/n_active_tok": 34.08, "calib/answer_extract_rate": 0.1640625, "calib/auroc": 0.3012422360248447, "calib/avg_num_step_conf": 0.84765625, "calib/ece": 0.641, "calib/final_conf_rate": 0.1171875, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.5333333333333333, "calib/gap": -0.22850931677018627, "calib/mean_conf": 0.7123333333333333, "calib/mu_c": 0.5371428571428571, "calib/mu_w": 0.7656521739130434, "calib/nonempty_final_conf_rate": 0.1171875, "calib/nonempty_reasoning_rate": 0.234375, "calib/nonempty_step_conf_rate": 0.1640625, "calib/pce": 0.5599999999999999, "calib/std_conf": 0.31450154565951216, "calib/step_conf_rate": 0.1640625, "calib/step_q_c": 0.5005045454545455, "calib/step_q_c_n": 22.0, "calib/step_q_gap": -0.09365750582750587, "calib/step_q_w": 0.5941620512820514, "calib/step_q_w_n": 195.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2991.0, "completions/max_terminated_length": 2991.0, "completions/mean_length": 616.61328125, "completions/mean_terminated_length": 680.40087890625, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.018133333333333335, "grad_norm": 0.022275879979133606, "learning_rate": 4.25e-06, "loss": 0.2932, "num_tokens": 4672658.0, "reward": 0.07711464911699295, "reward_std": 0.1641203761100769, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.0409480482339859, "rewards/format_reward_step": 0.0859375, "step": 17 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.0378237331614775, "aux_distill/mean_u": 0.2850642462215336, "aux_distill/n_active_tok": 17.176470588235293, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.18181818181818177, "calib/avg_num_step_conf": 0.28515625, "calib/ece": 0.6483374879275133, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": -0.37091362319365084, "calib/mean_conf": 0.61000415459418, "calib/mu_c": 0.27, "calib/mu_w": 0.6409136231936509, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.09375, "calib/nonempty_step_conf_rate": 0.07421875, "calib/pce": 0.58750415459418, "calib/std_conf": 0.3330381710490205, "calib/step_conf_rate": 0.07421875, "calib/step_q_c": 0.446, "calib/step_q_c_n": 5.0, "calib/step_q_gap": -0.10885294117647043, "calib/step_q_w": 0.5548529411764704, "calib/step_q_w_n": 68.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 631.98828125, "completions/mean_terminated_length": 728.779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.03261338174343109, "learning_rate": 4.5e-06, "loss": 0.1528, "num_tokens": 4945167.0, "reward": 0.030748046934604645, "reward_std": 0.07678812742233276, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.01852734386920929, "rewards/format_reward_step": 0.0390625, "step": 18 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.0097080160070349, "aux_distill/mean_u": 0.3194098281602694, "aux_distill/n_active_tok": 41.333333333333336, "calib/answer_extract_rate": 0.16015625, "calib/auroc": 0.5208333333333333, "calib/avg_num_step_conf": 1.08984375, "calib/ece": 0.47091428571428573, "calib/final_conf_rate": 0.13671875, "calib/format_rate": 0.09765625, "calib/frac_conf_gt_0.9": 0.22857142857142856, "calib/gap": 0.07566666666666666, "calib/mean_conf": 0.5274857142857142, "calib/mu_c": 0.5966666666666667, "calib/mu_w": 0.521, "calib/nonempty_final_conf_rate": 0.13671875, "calib/nonempty_reasoning_rate": 0.234375, "calib/nonempty_step_conf_rate": 0.19921875, "calib/pce": 0.45634285714285716, "calib/std_conf": 0.3273590751478148, "calib/step_conf_rate": 0.19921875, "calib/step_q_c": 0.7294736842105264, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.3565667611336033, "calib/step_q_w": 0.3729069230769231, "calib/step_q_w_n": 260.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2837.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 608.08203125, "completions/mean_terminated_length": 640.6131591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.020266666666666665, "grad_norm": 0.02417433261871338, "learning_rate": 4.75e-06, "loss": 0.3277, "num_tokens": 5205596.0, "reward": 0.08809477090835571, "reward_std": 0.19941377639770508, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.06290827691555023, "rewards/format_reward_step": 0.09765625, "step": 19 }, { "aux_distill/lambda": 0.29999999999999993, "aux_distill/loss": 1.0175798769508089, "aux_distill/mean_u": 0.345296856355202, "aux_distill/n_active_tok": 46.857142857142854, "calib/answer_extract_rate": 0.2109375, "calib/auroc": 0.3888888888888889, "calib/avg_num_step_conf": 1.28515625, "calib/ece": 0.38424444444444444, "calib/final_conf_rate": 0.17578125, "calib/format_rate": 0.1328125, "calib/frac_conf_gt_0.9": 0.1111111111111111, "calib/gap": -0.08728205128205124, "calib/mean_conf": 0.447311111111111, "calib/mu_c": 0.37166666666666665, "calib/mu_w": 0.4589487179487179, "calib/nonempty_final_conf_rate": 0.17578125, "calib/nonempty_reasoning_rate": 0.328125, "calib/nonempty_step_conf_rate": 0.26953125, "calib/pce": 0.3491111111111111, "calib/std_conf": 0.3052094233452916, "calib/step_conf_rate": 0.26953125, "calib/step_q_c": 0.47380952380952385, "calib/step_q_c_n": 21.0, "calib/step_q_gap": 0.19352738095238098, "calib/step_q_w": 0.28028214285714287, "calib/step_q_w_n": 308.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 582.6796875, "completions/mean_terminated_length": 621.5250244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.021333333333333333, "grad_norm": 0.021719202399253845, "learning_rate": 5e-06, "loss": 0.3435, "num_tokens": 5459634.0, "reward": 0.12606582045555115, "reward_std": 0.24382349848747253, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.0958816409111023, "rewards/format_reward_step": 0.1328125, "step": 20 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9779601020197715, "aux_distill/mean_u": 0.35301126158003554, "aux_distill/n_active_tok": 59.74193548387097, "calib/answer_extract_rate": 0.2421875, "calib/auroc": 0.4804347826086957, "calib/avg_num_step_conf": 1.93359375, "calib/ece": 0.39197978315128956, "calib/final_conf_rate": 0.19921875, "calib/format_rate": 0.16796875, "calib/frac_conf_gt_0.9": 0.1568627450980392, "calib/gap": 0.011890629145995035, "calib/mean_conf": 0.39727511880949473, "calib/mu_c": 0.40800000000000003, "calib/mu_w": 0.396109370854005, "calib/nonempty_final_conf_rate": 0.19921875, "calib/nonempty_reasoning_rate": 0.38671875, "calib/nonempty_step_conf_rate": 0.328125, "calib/pce": 0.3456078431372549, "calib/std_conf": 0.3456198548759202, "calib/step_conf_rate": 0.328125, "calib/step_q_c": 0.23555555555555557, "calib/step_q_c_n": 18.0, "calib/step_q_gap": -0.0360178732428964, "calib/step_q_w": 0.271573428798452, "calib/step_q_w_n": 477.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 621.890625, "completions/mean_terminated_length": 689.19482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0224, "grad_norm": 0.029949838295578957, "learning_rate": 4.9722222222222224e-06, "loss": 0.4186, "num_tokens": 5721798.0, "reward": 0.15618005394935608, "reward_std": 0.32561713457107544, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.12486011534929276, "rewards/format_reward_step": 0.16796875, "step": 21 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 1.0142595786601305, "aux_distill/mean_u": 0.38725862041017606, "aux_distill/n_active_tok": 70.125, "calib/answer_extract_rate": 0.31640625, "calib/auroc": 0.6407407407407408, "calib/avg_num_step_conf": 2.203125, "calib/ece": 0.27249275362318837, "calib/final_conf_rate": 0.26953125, "calib/format_rate": 0.203125, "calib/frac_conf_gt_0.9": 0.15942028985507245, "calib/gap": 0.18299999999999994, "calib/mean_conf": 0.4194492753623189, "calib/mu_c": 0.5626666666666666, "calib/mu_w": 0.3796666666666667, "calib/nonempty_final_conf_rate": 0.26953125, "calib/nonempty_reasoning_rate": 0.49609375, "calib/nonempty_step_conf_rate": 0.41796875, "calib/pce": 0.23727536231884055, "calib/std_conf": 0.34296911325132823, "calib/step_conf_rate": 0.41796875, "calib/step_q_c": 0.21596969696969698, "calib/step_q_c_n": 66.0, "calib/step_q_gap": -0.03991813435560426, "calib/step_q_w": 0.25588783132530124, "calib/step_q_w_n": 498.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 561.83984375, "completions/mean_terminated_length": 594.3429565429688, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.023466666666666667, "grad_norm": 0.02805226296186447, "learning_rate": 4.944444444444445e-06, "loss": 0.3614, "num_tokens": 5967445.0, "reward": 0.20527201890945435, "reward_std": 0.34744924306869507, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.14882531762123108, "rewards/format_reward_step": 0.203125, "step": 22 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9831480960692128, "aux_distill/mean_u": 0.31227868519153984, "aux_distill/n_active_tok": 66.58064516129032, "calib/answer_extract_rate": 0.33984375, "calib/auroc": 0.43243243243243246, "calib/avg_num_step_conf": 2.0234375, "calib/ece": 0.2986448154098389, "calib/final_conf_rate": 0.296875, "calib/format_rate": 0.23828125, "calib/frac_conf_gt_0.9": 0.06578947368421052, "calib/gap": -0.13671629690740222, "calib/mean_conf": 0.3181184996203653, "calib/mu_c": 0.185, "calib/mu_w": 0.3217162969074022, "calib/nonempty_final_conf_rate": 0.296875, "calib/nonempty_reasoning_rate": 0.48828125, "calib/nonempty_step_conf_rate": 0.41015625, "calib/pce": 0.29522376277826, "calib/std_conf": 0.2936267548297175, "calib/step_conf_rate": 0.41015625, "calib/step_q_c": 0.35166666666666674, "calib/step_q_c_n": 6.0, "calib/step_q_gap": 0.07229459464389526, "calib/step_q_w": 0.2793720720227715, "calib/step_q_w_n": 512.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2969.0, "completions/max_terminated_length": 2969.0, "completions/mean_length": 624.36328125, "completions/mean_terminated_length": 663.22412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.024533333333333334, "grad_norm": 0.01959611475467682, "learning_rate": 4.9166666666666665e-06, "loss": 0.3293, "num_tokens": 6231218.0, "reward": 0.2157849371433258, "reward_std": 0.3200821280479431, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.18547609448432922, "rewards/format_reward_step": 0.23828125, "step": 23 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 1.0078411493450403, "aux_distill/mean_u": 0.37552853406236947, "aux_distill/n_active_tok": 83.25, "calib/answer_extract_rate": 0.41015625, "calib/auroc": 0.3296511627906976, "calib/avg_num_step_conf": 2.6015625, "calib/ece": 0.3622120249895833, "calib/final_conf_rate": 0.375, "calib/format_rate": 0.30859375, "calib/frac_conf_gt_0.9": 0.10416666666666667, "calib/gap": -0.12713760929069765, "calib/mean_conf": 0.36041410832291665, "calib/mu_c": 0.24652, "calib/mu_w": 0.37365760929069763, "calib/nonempty_final_conf_rate": 0.375, "calib/nonempty_reasoning_rate": 0.59375, "calib/nonempty_step_conf_rate": 0.51953125, "calib/pce": 0.30922973332291664, "calib/std_conf": 0.31589091395569247, "calib/step_conf_rate": 0.51953125, "calib/step_q_c": 0.22427352941176468, "calib/step_q_c_n": 34.0, "calib/step_q_gap": -0.11105084149312358, "calib/step_q_w": 0.33532437090488826, "calib/step_q_w_n": 632.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 565.09765625, "completions/mean_terminated_length": 588.069091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0256, "grad_norm": 0.02145981602370739, "learning_rate": 4.888888888888889e-06, "loss": 0.4741, "num_tokens": 6480395.0, "reward": 0.2843121290206909, "reward_std": 0.3424231708049774, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.21706172823905945, "rewards/format_reward_step": 0.30859375, "step": 24 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9836863875389099, "aux_distill/mean_u": 0.3532546896091583, "aux_distill/n_active_tok": 106.0, "calib/answer_extract_rate": 0.4453125, "calib/auroc": 0.4957142857142857, "calib/avg_num_step_conf": 3.3125, "calib/ece": 0.3087335762024953, "calib/final_conf_rate": 0.41796875, "calib/format_rate": 0.328125, "calib/frac_conf_gt_0.9": 0.11214953271028037, "calib/gap": -0.003102926536669992, "calib/mean_conf": 0.3528999313426823, "calib/mu_c": 0.35, "calib/mu_w": 0.35310292653666997, "calib/nonempty_final_conf_rate": 0.41796875, "calib/nonempty_reasoning_rate": 0.6796875, "calib/nonempty_step_conf_rate": 0.59765625, "calib/pce": 0.298106473398757, "calib/std_conf": 0.3057849058569881, "calib/step_conf_rate": 0.59765625, "calib/step_q_c": 0.13434782608695653, "calib/step_q_c_n": 23.0, "calib/step_q_gap": -0.15756816371922702, "calib/step_q_w": 0.29191598980618355, "calib/step_q_w_n": 825.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 598.171875, "completions/mean_terminated_length": 622.48779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.02666666666666667, "grad_norm": 0.02117428369820118, "learning_rate": 4.861111111111111e-06, "loss": 0.4794, "num_tokens": 6736751.0, "reward": 0.302656352519989, "reward_std": 0.3933459520339966, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.2498440146446228, "rewards/format_reward_step": 0.328125, "step": 25 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9649364463984966, "aux_distill/mean_u": 0.43439169292320684, "aux_distill/n_active_tok": 122.25, "calib/answer_extract_rate": 0.5, "calib/auroc": 0.5594135802469136, "calib/avg_num_step_conf": 3.83984375, "calib/ece": 0.32071225440319434, "calib/final_conf_rate": 0.4453125, "calib/format_rate": 0.37109375, "calib/frac_conf_gt_0.9": 0.07017543859649122, "calib/gap": 0.01434076850033189, "calib/mean_conf": 0.36474734212249255, "calib/mu_c": 0.37833333333333335, "calib/mu_w": 0.36399256483300146, "calib/nonempty_final_conf_rate": 0.4453125, "calib/nonempty_reasoning_rate": 0.734375, "calib/nonempty_step_conf_rate": 0.66015625, "calib/pce": 0.3164140087891592, "calib/std_conf": 0.28864045101743246, "calib/step_conf_rate": 0.66015625, "calib/step_q_c": 0.18026315789473682, "calib/step_q_c_n": 38.0, "calib/step_q_gap": -0.07871680475979861, "calib/step_q_w": 0.25897996265453543, "calib/step_q_w_n": 945.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 603.3359375, "completions/mean_terminated_length": 627.8617553710938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.027733333333333332, "grad_norm": 0.021297749131917953, "learning_rate": 4.833333333333333e-06, "loss": 0.4328, "num_tokens": 6996445.0, "reward": 0.3466510474681854, "reward_std": 0.435642272233963, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.29877087473869324, "rewards/format_reward_step": 0.37109375, "step": 26 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9238360952585936, "aux_distill/mean_u": 0.3704680656924057, "aux_distill/n_active_tok": 123.75, "calib/answer_extract_rate": 0.6015625, "calib/auroc": 0.43408613445378147, "calib/avg_num_step_conf": 3.953125, "calib/ece": 0.30361644310026653, "calib/final_conf_rate": 0.5859375, "calib/format_rate": 0.48828125, "calib/frac_conf_gt_0.9": 0.08, "calib/gap": -0.0879839650679834, "calib/mean_conf": 0.3743435568997334, "calib/mu_c": 0.2945714285714286, "calib/mu_w": 0.382555393639412, "calib/nonempty_final_conf_rate": 0.5859375, "calib/nonempty_reasoning_rate": 0.7890625, "calib/nonempty_step_conf_rate": 0.703125, "calib/pce": 0.2923133333333333, "calib/std_conf": 0.29847912081859895, "calib/step_conf_rate": 0.703125, "calib/step_q_c": 0.32738, "calib/step_q_c_n": 55.0, "calib/step_q_gap": 0.03473288873430963, "calib/step_q_w": 0.2926471112656904, "calib/step_q_w_n": 956.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 504.91015625, "completions/mean_terminated_length": 521.1975708007812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0288, "grad_norm": 0.019252900034189224, "learning_rate": 4.805555555555556e-06, "loss": 0.4474, "num_tokens": 7230918.0, "reward": 0.4484649896621704, "reward_std": 0.42001235485076904, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.353961318731308, "rewards/format_reward_step": 0.48828125, "step": 27 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.912374921143055, "aux_distill/mean_u": 0.35857990176340526, "aux_distill/n_active_tok": 117.625, "calib/answer_extract_rate": 0.5625, "calib/auroc": 0.44670280036133697, "calib/avg_num_step_conf": 3.8359375, "calib/ece": 0.27327455407936924, "calib/final_conf_rate": 0.55078125, "calib/format_rate": 0.45703125, "calib/frac_conf_gt_0.9": 0.05673758865248227, "calib/gap": -0.038345627034073626, "calib/mean_conf": 0.34845044060419184, "calib/mu_c": 0.315, "calib/mu_w": 0.35334562703407363, "calib/nonempty_final_conf_rate": 0.55078125, "calib/nonempty_reasoning_rate": 0.76953125, "calib/nonempty_step_conf_rate": 0.69921875, "calib/pce": 0.24703271010773803, "calib/std_conf": 0.27598673100329424, "calib/step_conf_rate": 0.69921875, "calib/step_q_c": 0.4151190476190476, "calib/step_q_c_n": 84.0, "calib/step_q_gap": 0.0771668163846902, "calib/step_q_w": 0.3379522312343574, "calib/step_q_w_n": 898.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 2680.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 481.4453125, "completions/mean_terminated_length": 509.2974853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.029866666666666666, "grad_norm": 0.016628732904791832, "learning_rate": 4.777777777777778e-06, "loss": 0.3969, "num_tokens": 7461112.0, "reward": 0.4341887831687927, "reward_std": 0.4316248893737793, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.34103381633758545, "rewards/format_reward_step": 0.45703125, "step": 28 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9332783650606871, "aux_distill/mean_u": 0.387008633237988, "aux_distill/n_active_tok": 118.875, "calib/answer_extract_rate": 0.6796875, "calib/auroc": 0.3953703703703704, "calib/avg_num_step_conf": 3.74609375, "calib/ece": 0.36014871610465116, "calib/final_conf_rate": 0.671875, "calib/format_rate": 0.55859375, "calib/frac_conf_gt_0.9": 0.06395348837209303, "calib/gap": -0.04073567388888888, "calib/mean_conf": 0.38256732075581396, "calib/mu_c": 0.3442, "calib/mu_w": 0.3849356738888889, "calib/nonempty_final_conf_rate": 0.671875, "calib/nonempty_reasoning_rate": 0.87890625, "calib/nonempty_step_conf_rate": 0.8046875, "calib/pce": 0.3422882509883721, "calib/std_conf": 0.2834787782532363, "calib/step_conf_rate": 0.8046875, "calib/step_q_c": 0.30735294117647055, "calib/step_q_c_n": 34.0, "calib/step_q_gap": -0.08506815316426153, "calib/step_q_w": 0.3924210943407321, "calib/step_q_w_n": 925.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2737.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 443.3828125, "completions/mean_terminated_length": 463.2897644042969, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.030933333333333334, "grad_norm": 0.0148693285882473, "learning_rate": 4.75e-06, "loss": 0.419, "num_tokens": 7681746.0, "reward": 0.5097656846046448, "reward_std": 0.44440221786499023, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.4218751788139343, "rewards/format_reward_step": 0.55859375, "step": 29 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.935307526960969, "aux_distill/mean_u": 0.33969780650988646, "aux_distill/n_active_tok": 119.125, "calib/answer_extract_rate": 0.75, "calib/auroc": 0.42008196721311475, "calib/avg_num_step_conf": 3.72265625, "calib/ece": 0.3535955130890052, "calib/final_conf_rate": 0.74609375, "calib/format_rate": 0.61328125, "calib/frac_conf_gt_0.9": 0.06282722513089005, "calib/gap": -0.09215877049180332, "calib/mean_conf": 0.39329871727748694, "calib/mu_c": 0.305, "calib/mu_w": 0.3971587704918033, "calib/nonempty_final_conf_rate": 0.74609375, "calib/nonempty_reasoning_rate": 0.88671875, "calib/nonempty_step_conf_rate": 0.78125, "calib/pce": 0.3525047068062827, "calib/std_conf": 0.28442240836145366, "calib/step_conf_rate": 0.78125, "calib/step_q_c": 0.3773913043478261, "calib/step_q_c_n": 23.0, "calib/step_q_gap": 0.003847411028860548, "calib/step_q_w": 0.37354389331896554, "calib/step_q_w_n": 928.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2662.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 392.8515625, "completions/mean_terminated_length": 397.5098876953125, "completions/min_length": 0.0, "completions/min_terminated_length": 1.0, "epoch": 0.032, "grad_norm": 0.017016848549246788, "learning_rate": 4.722222222222222e-06, "loss": 0.4111, "num_tokens": 7889300.0, "reward": 0.5534157752990723, "reward_std": 0.43640461564064026, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.4623003900051117, "rewards/format_reward_step": 0.61328125, "step": 30 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9320898372679949, "aux_distill/mean_u": 0.35277812933664776, "aux_distill/n_active_tok": 129.25, "calib/answer_extract_rate": 0.75, "calib/auroc": 0.5129411764705882, "calib/avg_num_step_conf": 4.04296875, "calib/ece": 0.34517497453549106, "calib/final_conf_rate": 0.75, "calib/format_rate": 0.65234375, "calib/frac_conf_gt_0.9": 0.052083333333333336, "calib/gap": 0.0063673556692964706, "calib/mean_conf": 0.41537289120215776, "calib/mu_c": 0.4211764705882353, "calib/mu_w": 0.41480911491893885, "calib/nonempty_final_conf_rate": 0.75, "calib/nonempty_reasoning_rate": 0.9140625, "calib/nonempty_step_conf_rate": 0.85546875, "calib/pce": 0.33600309953549107, "calib/std_conf": 0.254673754611707, "calib/step_conf_rate": 0.85546875, "calib/step_q_c": 0.4259090909090909, "calib/step_q_c_n": 66.0, "calib/step_q_gap": 0.0247227423921349, "calib/step_q_w": 0.401186348516956, "calib/step_q_w_n": 969.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2856.0, "completions/max_terminated_length": 2856.0, "completions/mean_length": 357.6953125, "completions/mean_terminated_length": 361.936767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.03306666666666667, "grad_norm": 0.015859203413128853, "learning_rate": 4.694444444444445e-06, "loss": 0.3608, "num_tokens": 8086782.0, "reward": 0.6042488217353821, "reward_std": 0.4498489797115326, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.4897475838661194, "rewards/format_reward_step": 0.65234375, "step": 31 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9780595041811466, "aux_distill/mean_u": 0.372310869459932, "aux_distill/n_active_tok": 103.75, "calib/answer_extract_rate": 0.80078125, "calib/auroc": 0.49869109947643986, "calib/avg_num_step_conf": 3.2421875, "calib/ece": 0.369059009842277, "calib/final_conf_rate": 0.81640625, "calib/format_rate": 0.71484375, "calib/frac_conf_gt_0.9": 0.07177033492822966, "calib/gap": 0.004506691382592742, "calib/mean_conf": 0.446437000272899, "calib/mu_c": 0.45055555555555554, "calib/mu_w": 0.4460488641729628, "calib/nonempty_final_conf_rate": 0.81640625, "calib/nonempty_reasoning_rate": 0.9296875, "calib/nonempty_step_conf_rate": 0.86328125, "calib/pce": 0.3646858041006502, "calib/std_conf": 0.26680856897658306, "calib/step_conf_rate": 0.86328125, "calib/step_q_c": 0.5942307692307691, "calib/step_q_c_n": 78.0, "calib/step_q_gap": 0.14223462561374778, "calib/step_q_w": 0.45199614361702134, "calib/step_q_w_n": 752.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2095.0, "completions/max_terminated_length": 2095.0, "completions/mean_length": 329.16796875, "completions/mean_terminated_length": 334.39288330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.034133333333333335, "grad_norm": 0.01736695133149624, "learning_rate": 4.666666666666667e-06, "loss": 0.4322, "num_tokens": 8277753.0, "reward": 0.6483778953552246, "reward_std": 0.3941271901130676, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.5076932907104492, "rewards/format_reward_step": 0.71484375, "step": 32 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9598857369273901, "aux_distill/mean_u": 0.39386526272428385, "aux_distill/n_active_tok": 115.625, "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.437192118226601, "calib/avg_num_step_conf": 3.6328125, "calib/ece": 0.3795141434249163, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.72265625, "calib/frac_conf_gt_0.9": 0.05116279069767442, "calib/gap": -0.08247836208386056, "calib/mean_conf": 0.4240415852853815, "calib/mu_c": 0.3461666666666667, "calib/mu_w": 0.42864502875052723, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.84375, "calib/pce": 0.3738708876109629, "calib/std_conf": 0.261931027986288, "calib/step_conf_rate": 0.84375, "calib/step_q_c": 0.5409999999999999, "calib/step_q_c_n": 49.0, "calib/step_q_gap": 0.11988479001135066, "calib/step_q_w": 0.42111520998864926, "calib/step_q_w_n": 881.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 324.02734375, "completions/mean_terminated_length": 329.170654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 8.0, "epoch": 0.0352, "grad_norm": 0.015602920204401016, "learning_rate": 4.638888888888889e-06, "loss": 0.289, "num_tokens": 8467576.0, "reward": 0.659507155418396, "reward_std": 0.40564966201782227, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.545576810836792, "rewards/format_reward_step": 0.72265625, "step": 33 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9469494726508856, "aux_distill/mean_u": 0.3839535558586833, "aux_distill/n_active_tok": 126.125, "calib/answer_extract_rate": 0.88671875, "calib/auroc": 0.540210287013356, "calib/avg_num_step_conf": 3.94140625, "calib/ece": 0.32582589285714286, "calib/final_conf_rate": 0.875, "calib/format_rate": 0.79296875, "calib/frac_conf_gt_0.9": 0.04017857142857143, "calib/gap": 0.017872975277067327, "calib/mean_conf": 0.40171874999999996, "calib/mu_c": 0.41823529411764704, "calib/mu_w": 0.4003623188405797, "calib/nonempty_final_conf_rate": 0.875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.91015625, "calib/pce": 0.32582589285714286, "calib/std_conf": 0.23804656358400092, "calib/step_conf_rate": 0.91015625, "calib/step_q_c": 0.4535365853658536, "calib/step_q_c_n": 82.0, "calib/step_q_gap": 0.05502737559951215, "calib/step_q_w": 0.39850920976634147, "calib/step_q_w_n": 927.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2176.0, "completions/max_terminated_length": 2176.0, "completions/mean_length": 319.31640625, "completions/mean_terminated_length": 319.31640625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.03626666666666667, "grad_norm": 0.01574413850903511, "learning_rate": 4.611111111111112e-06, "loss": 0.3404, "num_tokens": 8654433.0, "reward": 0.7380313277244568, "reward_std": 0.3996368646621704, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.6166876554489136, "rewards/format_reward_step": 0.79296875, "step": 34 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9516091421246529, "aux_distill/mean_u": 0.4002384927589469, "aux_distill/n_active_tok": 110.625, "calib/answer_extract_rate": 0.91015625, "calib/auroc": 0.6142270861833105, "calib/avg_num_step_conf": 3.45703125, "calib/ece": 0.3480051724137931, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.8046875, "calib/frac_conf_gt_0.9": 0.021551724137931036, "calib/gap": 0.08140136798905606, "calib/mean_conf": 0.4212810344827586, "calib/mu_c": 0.49671764705882354, "calib/mu_w": 0.4153162790697675, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.8984375, "calib/pce": 0.3480051724137931, "calib/std_conf": 0.23485841219390075, "calib/step_conf_rate": 0.8984375, "calib/step_q_c": 0.4683333333333333, "calib/step_q_c_n": 66.0, "calib/step_q_gap": 0.012650671550671566, "calib/step_q_w": 0.45568266178266176, "calib/step_q_w_n": 819.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2100.0, "completions/max_terminated_length": 2100.0, "completions/mean_length": 278.61328125, "completions/mean_terminated_length": 278.61328125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 0.037333333333333336, "grad_norm": 0.013850248418748379, "learning_rate": 4.583333333333333e-06, "loss": 0.3803, "num_tokens": 8835014.0, "reward": 0.7425938844680786, "reward_std": 0.3359072804450989, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.6140941381454468, "rewards/format_reward_step": 0.8046875, "step": 35 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9501235522329807, "aux_distill/mean_u": 0.3815904537690148, "aux_distill/n_active_tok": 126.125, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.5592874867068416, "calib/avg_num_step_conf": 3.94140625, "calib/ece": 0.2880041152263375, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.875, "calib/frac_conf_gt_0.9": 0.03292181069958848, "calib/gap": 0.035317263381779485, "calib/mean_conf": 0.39499999999999996, "calib/mu_c": 0.42653846153846153, "calib/mu_w": 0.39122119815668205, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.2880041152263375, "calib/std_conf": 0.23859346073640167, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.44833448275862076, "calib/step_q_c_n": 116.0, "calib/step_q_gap": 0.05377625207553005, "calib/step_q_w": 0.3945582306830907, "calib/step_q_w_n": 893.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2078.0, "completions/max_terminated_length": 2078.0, "completions/mean_length": 271.53125, "completions/mean_terminated_length": 271.53125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 0.0384, "grad_norm": 0.015552644617855549, "learning_rate": 4.555555555555556e-06, "loss": 0.4061, "num_tokens": 9007238.0, "reward": 0.8283183574676514, "reward_std": 0.3272024989128113, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.6800742149353027, "rewards/format_reward_step": 0.875, "step": 36 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9274401869624853, "aux_distill/mean_u": 0.3502309705297757, "aux_distill/n_active_tok": 117.5, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5682860717264386, "calib/avg_num_step_conf": 3.703125, "calib/ece": 0.29065833333333335, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.03333333333333333, "calib/gap": 0.046988323603002535, "calib/mean_conf": 0.37959166666666666, "calib/mu_c": 0.4222727272727273, "calib/mu_w": 0.37528440366972476, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.28929166666666667, "calib/std_conf": 0.23975560458632778, "calib/step_conf_rate": 0.94140625, "calib/step_q_c": 0.4208333333333333, "calib/step_q_c_n": 84.0, "calib/step_q_gap": 0.023319444444444393, "calib/step_q_w": 0.3975138888888889, "calib/step_q_w_n": 864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 275.14453125, "completions/mean_terminated_length": 276.2235412597656, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.039466666666666664, "grad_norm": 0.014115996658802032, "learning_rate": 4.527777777777778e-06, "loss": 0.3156, "num_tokens": 9184771.0, "reward": 0.8288605213165283, "reward_std": 0.30853700637817383, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.6928772926330566, "rewards/format_reward_step": 0.87890625, "step": 37 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.959440141916275, "aux_distill/mean_u": 0.41723974982834844, "aux_distill/n_active_tok": 136.25, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.49417747641509435, "calib/avg_num_step_conf": 4.2578125, "calib/ece": 0.25258196721311477, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.90234375, "calib/frac_conf_gt_0.9": 0.01639344262295082, "calib/gap": -0.012299528301886742, "calib/mean_conf": 0.35381147540983604, "calib/mu_c": 0.343125, "calib/mu_w": 0.35542452830188676, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.23762295081967214, "calib/std_conf": 0.21697770649128129, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.4103448275862069, "calib/step_q_c_n": 116.0, "calib/step_q_gap": 0.047790412801812565, "calib/step_q_w": 0.3625544147843943, "calib/step_q_w_n": 974.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2869.0, "completions/max_terminated_length": 2869.0, "completions/mean_length": 294.90625, "completions/mean_terminated_length": 294.90625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.04053333333333333, "grad_norm": 0.012253638356924057, "learning_rate": 4.5e-06, "loss": 0.4246, "num_tokens": 9367155.0, "reward": 0.8671990036964417, "reward_std": 0.27873316407203674, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.7070543169975281, "rewards/format_reward_step": 0.90234375, "step": 38 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9337054584175348, "aux_distill/mean_u": 0.3689994358002082, "aux_distill/n_active_tok": 131.0, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5425077639751553, "calib/avg_num_step_conf": 4.09375, "calib/ece": 0.3108972759682684, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.020242914979757085, "calib/gap": 0.04685220792396477, "calib/mean_conf": 0.3814235917577421, "calib/mu_c": 0.4239130434782608, "calib/mu_w": 0.37706083555429604, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.299601729409564, "calib/std_conf": 0.23669469534543297, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.4155555555555555, "calib/step_q_c_n": 81.0, "calib/step_q_gap": 0.016048833735493484, "calib/step_q_w": 0.399506721820062, "calib/step_q_w_n": 967.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2210.0, "completions/max_terminated_length": 2210.0, "completions/mean_length": 279.98046875, "completions/mean_terminated_length": 279.98046875, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "epoch": 0.0416, "grad_norm": 0.01320998277515173, "learning_rate": 4.472222222222223e-06, "loss": 0.3357, "num_tokens": 9544918.0, "reward": 0.8607459664344788, "reward_std": 0.26971176266670227, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.7175856828689575, "rewards/format_reward_step": 0.9140625, "step": 39 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9231565408408642, "aux_distill/mean_u": 0.37410681811433055, "aux_distill/n_active_tok": 131.0, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.39944819129368486, "calib/avg_num_step_conf": 4.09375, "calib/ece": 0.3130485829959514, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.024291497975708502, "calib/gap": -0.08006805640711212, "calib/mean_conf": 0.35410121457489874, "calib/mu_c": 0.2785714285714286, "calib/mu_w": 0.3586394849785407, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.30523481781376516, "calib/std_conf": 0.2113434420188021, "calib/step_conf_rate": 0.97265625, "calib/step_q_c": 0.296734693877551, "calib/step_q_c_n": 49.0, "calib/step_q_gap": -0.058788429245572094, "calib/step_q_w": 0.3555231231231231, "calib/step_q_w_n": 999.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1816.0, "completions/max_terminated_length": 1816.0, "completions/mean_length": 246.56640625, "completions/mean_terminated_length": 246.56640625, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "epoch": 0.042666666666666665, "grad_norm": 0.012942949309945107, "learning_rate": 4.444444444444444e-06, "loss": 0.3321, "num_tokens": 9714799.0, "reward": 0.8622703552246094, "reward_std": 0.23404854536056519, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.7440719604492188, "rewards/format_reward_step": 0.92578125, "step": 40 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9747274816036224, "aux_distill/mean_u": 0.40832076939570366, "aux_distill/n_active_tok": 133.625, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5851856301814251, "calib/avg_num_step_conf": 4.17578125, "calib/ece": 0.18477224013948296, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.00819672131147541, "calib/gap": 0.051428750493258124, "calib/mean_conf": 0.31940808772936946, "calib/mu_c": 0.3621951219512195, "calib/mu_w": 0.3107663714579614, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.16807377049180328, "calib/std_conf": 0.20583733841643923, "calib/step_conf_rate": 0.9609375, "calib/step_q_c": 0.412258064516129, "calib/step_q_c_n": 155.0, "calib/step_q_gap": 0.07152944376965276, "calib/step_q_w": 0.34072862074647625, "calib/step_q_w_n": 914.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2628.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 266.53515625, "completions/mean_terminated_length": 267.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.04373333333333333, "grad_norm": 0.012249798513948917, "learning_rate": 4.416666666666667e-06, "loss": 0.3153, "num_tokens": 9890280.0, "reward": 0.8936082124710083, "reward_std": 0.2826644480228424, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.7286226749420166, "rewards/format_reward_step": 0.8984375, "step": 41 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9232034366577864, "aux_distill/mean_u": 0.412800156292371, "aux_distill/n_active_tok": 146.5, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.46169073125291105, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.2671510489795918, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.012244897959183673, "calib/gap": -0.027628815323707534, "calib/mean_conf": 0.33653880408163267, "calib/mu_c": 0.3110526315789473, "calib/mu_w": 0.33868144690265484, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2630694163265306, "calib/std_conf": 0.20534667376184962, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3559259259259259, "calib/step_q_c_n": 54.0, "calib/step_q_gap": 0.0018249268195809631, "calib/step_q_w": 0.35410099910634496, "calib/step_q_w_n": 1119.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 263.94140625, "completions/mean_terminated_length": 264.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.0448, "grad_norm": 0.012458818033337593, "learning_rate": 4.388888888888889e-06, "loss": 0.4661, "num_tokens": 10062217.0, "reward": 0.8924587965011597, "reward_std": 0.22109654545783997, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.7692925930023193, "rewards/format_reward_step": 0.94140625, "step": 42 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9289751965552568, "aux_distill/mean_u": 0.36524919348664364, "aux_distill/n_active_tok": 143.5, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.510925925925926, "calib/avg_num_step_conf": 4.484375, "calib/ece": 0.23542971887550201, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": -0.01837222222222229, "calib/mean_conf": 0.31618473895582333, "calib/mu_c": 0.2995833333333333, "calib/mu_w": 0.3179555555555556, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22761445783132528, "calib/std_conf": 0.21726067017121417, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3331, "calib/step_q_c_n": 100.0, "calib/step_q_gap": 0.003037022900763353, "calib/step_q_w": 0.33006297709923665, "calib/step_q_w_n": 1048.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 950.0, "completions/max_terminated_length": 950.0, "completions/mean_length": 250.06640625, "completions/mean_terminated_length": 251.0470733642578, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.04586666666666667, "grad_norm": 0.013071301393210888, "learning_rate": 4.361111111111112e-06, "loss": 0.2971, "num_tokens": 10231458.0, "reward": 0.9076724648475647, "reward_std": 0.2054036259651184, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.772376298904419, "rewards/format_reward_step": 0.94921875, "step": 43 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9430578760802746, "aux_distill/mean_u": 0.398295250677591, "aux_distill/n_active_tok": 141.25, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4277208859252203, "calib/avg_num_step_conf": 4.44140625, "calib/ece": 0.2634333333333333, "calib/final_conf_rate": 0.9375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.016666666666666666, "calib/gap": -0.06060871636103832, "calib/mean_conf": 0.3316, "calib/mu_c": 0.27578947368421053, "calib/mu_w": 0.33639819004524885, "calib/nonempty_final_conf_rate": 0.9375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2579333333333333, "calib/std_conf": 0.20778155195621514, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.28509638554216865, "calib/step_q_c_n": 83.0, "calib/step_q_gap": -0.05889792185821091, "calib/step_q_w": 0.34399430740037956, "calib/step_q_w_n": 1054.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 279.29296875, "completions/mean_terminated_length": 282.6047668457031, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.046933333333333334, "grad_norm": 0.012910122983157635, "learning_rate": 4.333333333333334e-06, "loss": 0.3078, "num_tokens": 10409277.0, "reward": 0.8753119111061096, "reward_std": 0.2265249788761139, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.7506237626075745, "rewards/format_reward_step": 0.92578125, "step": 44 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9212546311318874, "aux_distill/mean_u": 0.35253776884269494, "aux_distill/n_active_tok": 134.0, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.55645390070922, "calib/avg_num_step_conf": 4.1875, "calib/ece": 0.25850800000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.03421134751773047, "calib/mean_conf": 0.318508, "calib/mu_c": 0.35066666666666674, "calib/mu_w": 0.31645531914893626, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.25850800000000007, "calib/std_conf": 0.20830739289809183, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.339375, "calib/step_q_c_n": 48.0, "calib/step_q_gap": 0.009534765624999997, "calib/step_q_w": 0.329840234375, "calib/step_q_w_n": 1024.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3059.0, "completions/max_terminated_length": 3059.0, "completions/mean_length": 263.28125, "completions/mean_terminated_length": 263.28125, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.048, "grad_norm": 0.014113575220108032, "learning_rate": 4.305555555555556e-06, "loss": 0.4027, "num_tokens": 10581725.0, "reward": 0.9087159633636475, "reward_std": 0.17309433221817017, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.8018069267272949, "rewards/format_reward_step": 0.95703125, "step": 45 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9593624100089073, "aux_distill/mean_u": 0.4153745987914725, "aux_distill/n_active_tok": 138.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46116013308259807, "calib/avg_num_step_conf": 4.32421875, "calib/ece": 0.20393607751023624, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.03989102767475766, "calib/mean_conf": 0.29469985703779533, "calib/mu_c": 0.25967741935483873, "calib/mu_w": 0.2995684470295964, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.18829434522677166, "calib/std_conf": 0.20188818989869645, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3445454545454546, "calib/step_q_c_n": 121.0, "calib/step_q_gap": 0.0017594504886594975, "calib/step_q_w": 0.3427860040567951, "calib/step_q_w_n": 986.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1107.0, "completions/max_terminated_length": 1107.0, "completions/mean_length": 232.80078125, "completions/mean_terminated_length": 233.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.04906666666666667, "grad_norm": 0.01254682894796133, "learning_rate": 4.277777777777778e-06, "loss": 0.2697, "num_tokens": 10746090.0, "reward": 0.9490313529968262, "reward_std": 0.14605683088302612, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7965002655982971, "rewards/format_reward_step": 0.98046875, "step": 46 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9571955800056458, "aux_distill/mean_u": 0.4023008092076161, "aux_distill/n_active_tok": 122.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.46918103448275855, "calib/avg_num_step_conf": 3.8515625, "calib/ece": 0.28256349206349207, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.007560344827586163, "calib/mean_conf": 0.3319603174603175, "calib/mu_c": 0.325, "calib/mu_w": 0.3325603448275862, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.2675793650793651, "calib/std_conf": 0.22515204389421048, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.3146268656716419, "calib/step_q_c_n": 67.0, "calib/step_q_gap": -0.02809892322933749, "calib/step_q_w": 0.3427257889009794, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2447.0, "completions/max_terminated_length": 2447.0, "completions/mean_length": 225.12890625, "completions/mean_terminated_length": 225.12890625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.050133333333333335, "grad_norm": 0.014441216364502907, "learning_rate": 4.25e-06, "loss": 0.291, "num_tokens": 10909699.0, "reward": 0.8981256484985352, "reward_std": 0.1963968276977539, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7689076066017151, "rewards/format_reward_step": 0.94921875, "step": 47 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9565908145159483, "aux_distill/mean_u": 0.382051206029253, "aux_distill/n_active_tok": 127.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5426973907150119, "calib/avg_num_step_conf": 4.01953125, "calib/ece": 0.21650036651687815, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.00587946273330664, "calib/mean_conf": 0.3162632123666805, "calib/mu_c": 0.3215384615384616, "calib/mu_w": 0.31565899880515497, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21499839023229317, "calib/std_conf": 0.20934874325681233, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3501052631578947, "calib/step_q_c_n": 95.0, "calib/step_q_gap": 0.015301194635410742, "calib/step_q_w": 0.33480406852248396, "calib/step_q_w_n": 934.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2048.0, "completions/max_terminated_length": 2048.0, "completions/mean_length": 209.16015625, "completions/mean_terminated_length": 209.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0512, "grad_norm": 0.012829742394387722, "learning_rate": 4.222222222222223e-06, "loss": 0.2875, "num_tokens": 11066932.0, "reward": 0.9423176050186157, "reward_std": 0.1549239456653595, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8026039600372314, "rewards/format_reward_step": 0.98046875, "step": 48 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9549869894981384, "aux_distill/mean_u": 0.40374540819858157, "aux_distill/n_active_tok": 134.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4641228070175439, "calib/avg_num_step_conf": 4.203125, "calib/ece": 0.24660474308300395, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.030000877192982445, "calib/mean_conf": 0.31663636363636366, "calib/mu_c": 0.2896, "calib/mu_w": 0.31960087719298247, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2322134387351779, "calib/std_conf": 0.21880287431412032, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30495145631067966, "calib/step_q_c_n": 103.0, "calib/step_q_gap": -0.04869808120216723, "calib/step_q_w": 0.3536495375128469, "calib/step_q_w_n": 973.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2319.0, "completions/max_terminated_length": 2319.0, "completions/mean_length": 218.51171875, "completions/mean_terminated_length": 218.51171875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.05226666666666667, "grad_norm": 0.01155038271099329, "learning_rate": 4.194444444444445e-06, "loss": 0.3168, "num_tokens": 11227407.0, "reward": 0.9383452534675598, "reward_std": 0.1467917412519455, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.7985655069351196, "rewards/format_reward_step": 0.98046875, "step": 49 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9325551148504019, "aux_distill/mean_u": 0.39808962162575967, "aux_distill/n_active_tok": 148.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4599811676082862, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.24267716535433073, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.029524482109227868, "calib/mean_conf": 0.31354330708661415, "calib/mu_c": 0.28611111111111115, "calib/mu_w": 0.315635593220339, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24267716535433073, "calib/std_conf": 0.18163313183774915, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.34123287671232877, "calib/step_q_c_n": 73.0, "calib/step_q_gap": -0.00013390964314696774, "calib/step_q_w": 0.34136678635547574, "calib/step_q_w_n": 1114.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2543.0, "completions/max_terminated_length": 2543.0, "completions/mean_length": 244.6875, "completions/mean_terminated_length": 245.64707946777344, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.05333333333333334, "grad_norm": 0.010162976570427418, "learning_rate": 4.166666666666667e-06, "loss": 0.3105, "num_tokens": 11395407.0, "reward": 0.9332138299942017, "reward_std": 0.13436943292617798, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.8195527195930481, "rewards/format_reward_step": 0.9765625, "step": 50 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9404325764626265, "aux_distill/mean_u": 0.35824322088937677, "aux_distill/n_active_tok": 145.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5232451620290851, "calib/avg_num_step_conf": 4.53515625, "calib/ece": 0.2178271653543307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.023622047244094488, "calib/gap": 0.023285480361845856, "calib/mean_conf": 0.3519366141732283, "calib/mu_c": 0.37146341463414634, "calib/mu_w": 0.3481779342723005, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.20417322834645665, "calib/std_conf": 0.22717483458875004, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.38983695652173905, "calib/step_q_c_n": 184.0, "calib/step_q_gap": 0.004287314761247718, "calib/step_q_w": 0.38554964176049134, "calib/step_q_w_n": 977.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1844.0, "completions/max_terminated_length": 1844.0, "completions/mean_length": 227.8046875, "completions/mean_terminated_length": 227.8046875, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.0544, "grad_norm": 0.012741345912218094, "learning_rate": 4.138888888888889e-06, "loss": 0.3269, "num_tokens": 11563021.0, "reward": 0.9533712863922119, "reward_std": 0.18221133947372437, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.766117513179779, "rewards/format_reward_step": 0.98046875, "step": 51 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9390203971415758, "aux_distill/mean_u": 0.4210420142587068, "aux_distill/n_active_tok": 148.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.547441545238902, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.24308300395256915, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.03881565570992879, "calib/mean_conf": 0.3178656126482214, "calib/mu_c": 0.35269230769230764, "calib/mu_w": 0.31387665198237885, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22909090909090907, "calib/std_conf": 0.21644675865289373, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3586407766990291, "calib/step_q_c_n": 103.0, "calib/step_q_gap": -0.006827857987317776, "calib/step_q_w": 0.3654686346863469, "calib/step_q_w_n": 1084.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 235.1171875, "completions/mean_terminated_length": 235.1171875, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.055466666666666664, "grad_norm": 0.012383176013827324, "learning_rate": 4.111111111111111e-06, "loss": 0.3683, "num_tokens": 11731163.0, "reward": 0.9395182132720947, "reward_std": 0.15534138679504395, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8009113073348999, "rewards/format_reward_step": 0.9765625, "step": 52 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9244867339730263, "aux_distill/mean_u": 0.4068314368270426, "aux_distill/n_active_tok": 151.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5781591074460345, "calib/avg_num_step_conf": 4.73828125, "calib/ece": 0.16415686274509805, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.051749939364540365, "calib/mean_conf": 0.29780392156862745, "calib/mu_c": 0.3418421052631579, "calib/mu_w": 0.2900921658986175, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15647058823529414, "calib/std_conf": 0.20419913575587145, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.37377142857142853, "calib/step_q_c_n": 175.0, "calib/step_q_gap": 0.019372584640792723, "calib/step_q_w": 0.3543988439306358, "calib/step_q_w_n": 1038.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 237.59765625, "completions/mean_terminated_length": 237.59765625, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.05653333333333333, "grad_norm": 0.011600131168961525, "learning_rate": 4.083333333333334e-06, "loss": 0.2895, "num_tokens": 11897812.0, "reward": 0.9779938459396362, "reward_std": 0.1375225931406021, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8153626918792725, "rewards/format_reward_step": 0.9921875, "step": 53 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.940915510058403, "aux_distill/mean_u": 0.3600278968701587, "aux_distill/n_active_tok": 152.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5066588785046728, "calib/avg_num_step_conf": 4.7578125, "calib/ece": 0.20701181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.0011144859813084218, "calib/mean_conf": 0.3096889763779528, "calib/mu_c": 0.30875, "calib/mu_w": 0.30986448598130845, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.17961023622047245, "calib/std_conf": 0.19435782031969961, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.4051515151515151, "calib/step_q_c_n": 165.0, "calib/step_q_gap": 0.026610204610204524, "calib/step_q_w": 0.37854131054131057, "calib/step_q_w_n": 1053.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2306.0, "completions/max_terminated_length": 2306.0, "completions/mean_length": 236.3125, "completions/mean_terminated_length": 236.3125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.0576, "grad_norm": 0.01169079914689064, "learning_rate": 4.055555555555556e-06, "loss": 0.3243, "num_tokens": 12064540.0, "reward": 0.9626379609107971, "reward_std": 0.158560112118721, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.7885571122169495, "rewards/format_reward_step": 0.98046875, "step": 54 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.90636813826859, "aux_distill/mean_u": 0.3753203712973043, "aux_distill/n_active_tok": 168.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5286458333333333, "calib/avg_num_step_conf": 5.28515625, "calib/ece": 0.21897783464566928, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": 0.015135720720720724, "calib/mean_conf": 0.3255211417322835, "calib/mu_c": 0.33875, "calib/mu_w": 0.32361427927927927, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2092573622047244, "calib/std_conf": 0.22133494742193616, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.4003896103896104, "calib/step_q_c_n": 154.0, "calib/step_q_gap": 0.0166956987966162, "calib/step_q_w": 0.3836939115929942, "calib/step_q_w_n": 1199.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 847.0, "completions/max_terminated_length": 847.0, "completions/mean_length": 230.84375, "completions/mean_terminated_length": 232.6614227294922, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.058666666666666666, "grad_norm": 0.011481177993118763, "learning_rate": 4.027777777777779e-06, "loss": 0.2571, "num_tokens": 12231460.0, "reward": 0.9448150396347046, "reward_std": 0.16676904261112213, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.7880675792694092, "rewards/format_reward_step": 0.9765625, "step": 55 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9204953722655773, "aux_distill/mean_u": 0.3976945579537004, "aux_distill/n_active_tok": 161.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5634854771784232, "calib/avg_num_step_conf": 5.0390625, "calib/ece": 0.2430078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01171875, "calib/gap": 0.07690456431535264, "calib/mean_conf": 0.30160156250000003, "calib/mu_c": 0.37399999999999994, "calib/mu_w": 0.2970954356846473, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2430078125, "calib/std_conf": 0.21080058852517133, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.40829787234042547, "calib/step_q_c_n": 94.0, "calib/step_q_gap": 0.02134293923005759, "calib/step_q_w": 0.3869549331103679, "calib/step_q_w_n": 1196.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 851.0, "completions/max_terminated_length": 851.0, "completions/mean_length": 223.953125, "completions/mean_terminated_length": 224.83139038085938, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.05973333333333333, "grad_norm": 0.012053816579282284, "learning_rate": 4.000000000000001e-06, "loss": 0.3112, "num_tokens": 12395632.0, "reward": 0.9471312761306763, "reward_std": 0.11150771379470825, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.8434812426567078, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9139548428356647, "aux_distill/mean_u": 0.37871803075938426, "aux_distill/n_active_tok": 175.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.447172619047619, "calib/avg_num_step_conf": 5.46875, "calib/ece": 0.22818503937007875, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023622047244094488, "calib/gap": -0.04487946428571421, "calib/mean_conf": 0.2985787401574803, "calib/mu_c": 0.259, "calib/mu_w": 0.3038794642857142, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2043267716535433, "calib/std_conf": 0.23301049218710934, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3707453416149068, "calib/step_q_c_n": 161.0, "calib/step_q_gap": -0.0028575639541004727, "calib/step_q_w": 0.37360290556900727, "calib/step_q_w_n": 1239.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2270.0, "completions/max_terminated_length": 2270.0, "completions/mean_length": 255.6953125, "completions/mean_terminated_length": 255.6953125, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.0608, "grad_norm": 0.01160177867859602, "learning_rate": 3.972222222222223e-06, "loss": 0.3035, "num_tokens": 12567882.0, "reward": 0.9475748538970947, "reward_std": 0.14415797591209412, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.7896810173988342, "rewards/format_reward_step": 0.98828125, "step": 57 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9138444364070892, "aux_distill/mean_u": 0.35042719995743726, "aux_distill/n_active_tok": 160.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.46266721266721267, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.2456862745098039, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": -0.035843570843570816, "calib/mean_conf": 0.3475686274509804, "calib/mu_c": 0.3163636363636364, "calib/mu_w": 0.3522072072072072, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23192156862745097, "calib/std_conf": 0.22922967803116806, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.39843537414965985, "calib/step_q_c_n": 147.0, "calib/step_q_gap": 0.009971809619370164, "calib/step_q_w": 0.3884635645302897, "calib/step_q_w_n": 1139.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2465.0, "completions/max_terminated_length": 2465.0, "completions/mean_length": 226.9453125, "completions/mean_terminated_length": 226.9453125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.06186666666666667, "grad_norm": 0.015932245180010796, "learning_rate": 3.944444444444445e-06, "loss": 0.3364, "num_tokens": 12732300.0, "reward": 0.9416613578796387, "reward_std": 0.16059020161628723, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.7700413465499878, "rewards/format_reward_step": 0.984375, "step": 58 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8896596431732178, "aux_distill/mean_u": 0.3602237890788429, "aux_distill/n_active_tok": 180.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.44578582606751616, "calib/avg_num_step_conf": 5.65234375, "calib/ece": 0.20599607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": -0.04110529845741112, "calib/mean_conf": 0.31885882352941175, "calib/mu_c": 0.2845238095238095, "calib/mu_w": 0.32562910798122063, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18007450980392156, "calib/std_conf": 0.2146131304358581, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36808823529411766, "calib/step_q_c_n": 204.0, "calib/step_q_gap": -0.018032440490274915, "calib/step_q_w": 0.3861206757843926, "calib/step_q_w_n": 1243.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2643.0, "completions/max_terminated_length": 2643.0, "completions/mean_length": 250.51953125, "completions/mean_terminated_length": 250.51953125, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.06293333333333333, "grad_norm": 0.01418408565223217, "learning_rate": 3.916666666666667e-06, "loss": 0.3289, "num_tokens": 12902681.0, "reward": 0.9583262205123901, "reward_std": 0.15988890826702118, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.7682148814201355, "rewards/format_reward_step": 0.984375, "step": 59 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.922422407194972, "aux_distill/mean_u": 0.35424350929867404, "aux_distill/n_active_tok": 170.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5448898265353961, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.2269411764705882, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.043080168776371386, "calib/mean_conf": 0.27996078431372545, "calib/mu_c": 0.32000000000000006, "calib/mu_w": 0.2769198312236287, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21815686274509802, "calib/std_conf": 0.21641734923606573, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3780232558139534, "calib/step_q_c_n": 86.0, "calib/step_q_gap": 0.022562003951062226, "calib/step_q_w": 0.3554612518628912, "calib/step_q_w_n": 1342.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1250.0, "completions/max_terminated_length": 1250.0, "completions/mean_length": 225.65625, "completions/mean_terminated_length": 226.5411834716797, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.064, "grad_norm": 0.01299036294221878, "learning_rate": 3.88888888888889e-06, "loss": 0.2561, "num_tokens": 13069305.0, "reward": 0.9524890780448914, "reward_std": 0.11521777510643005, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.8424781560897827, "rewards/format_reward_step": 0.9921875, "step": 60 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.919845612719655, "aux_distill/mean_u": 0.34344900348094065, "aux_distill/n_active_tok": 170.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4713050314465409, "calib/avg_num_step_conf": 5.33984375, "calib/ece": 0.1759685039370079, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.021732704402515735, "calib/mean_conf": 0.2464724409448819, "calib/mu_c": 0.22833333333333333, "calib/mu_w": 0.25006603773584907, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12854330708661418, "calib/std_conf": 0.1897106718249609, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3376363636363636, "calib/step_q_c_n": 220.0, "calib/step_q_gap": 0.013825552825552812, "calib/step_q_w": 0.3238108108108108, "calib/step_q_w_n": 1147.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 770.0, "completions/max_terminated_length": 770.0, "completions/mean_length": 221.8515625, "completions/mean_terminated_length": 222.72158813476562, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.06506666666666666, "grad_norm": 0.016961747780442238, "learning_rate": 3.861111111111112e-06, "loss": 0.2661, "num_tokens": 13230163.0, "reward": 0.9816569089889526, "reward_std": 0.11320608109235764, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8070638179779053, "rewards/format_reward_step": 0.9921875, "step": 61 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8867218066006899, "aux_distill/mean_u": 0.3593741841944759, "aux_distill/n_active_tok": 173.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5004852013585638, "calib/avg_num_step_conf": 5.4296875, "calib/ece": 0.18423046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": -0.011993692382338661, "calib/mean_conf": 0.26183984375, "calib/mu_c": 0.2511111111111111, "calib/mu_w": 0.2631048034934498, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17030078125, "calib/std_conf": 0.19503744065236703, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3344662162162162, "calib/step_q_c_n": 148.0, "calib/step_q_gap": -0.0010531074552813657, "calib/step_q_w": 0.33551932367149756, "calib/step_q_w_n": 1242.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 748.0, "completions/max_terminated_length": 748.0, "completions/mean_length": 231.8046875, "completions/mean_terminated_length": 232.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.06613333333333334, "grad_norm": 0.015427176840603352, "learning_rate": 3.833333333333334e-06, "loss": 0.2946, "num_tokens": 13396585.0, "reward": 0.9731844663619995, "reward_std": 0.09732332825660706, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.8409003019332886, "rewards/format_reward_step": 1.0, "step": 62 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9000698383897543, "aux_distill/mean_u": 0.38631773311322914, "aux_distill/n_active_tok": 186.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4998644619137978, "calib/avg_num_step_conf": 5.828125, "calib/ece": 0.15207171314741033, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.013889943074003763, "calib/mean_conf": 0.2455378486055777, "calib/mu_c": 0.2335294117647059, "calib/mu_w": 0.24741935483870967, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13107569721115536, "calib/std_conf": 0.20110454348946114, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.313854748603352, "calib/step_q_c_n": 179.0, "calib/step_q_gap": -0.011379828700532224, "calib/step_q_w": 0.3252345773038842, "calib/step_q_w_n": 1313.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 255.2421875, "completions/mean_terminated_length": 257.251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.0672, "grad_norm": 0.012757709249854088, "learning_rate": 3.8055555555555556e-06, "loss": 0.2502, "num_tokens": 13570567.0, "reward": 0.9583083391189575, "reward_std": 0.15712447464466095, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8072417974472046, "rewards/format_reward_step": 0.9765625, "step": 63 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8718363661319017, "aux_distill/mean_u": 0.34136569305581255, "aux_distill/n_active_tok": 196.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4279731070208311, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.1686771653543307, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.04626562327785741, "calib/mean_conf": 0.2149448818897638, "calib/mu_c": 0.17651162790697672, "calib/mu_w": 0.22277725118483413, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10716535433070866, "calib/std_conf": 0.18758081962484274, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2654395161290322, "calib/step_q_c_n": 248.0, "calib/step_q_gap": -0.04765753156211089, "calib/step_q_w": 0.3130970476911431, "calib/step_q_w_n": 1321.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1838.0, "completions/max_terminated_length": 1838.0, "completions/mean_length": 246.79296875, "completions/mean_terminated_length": 247.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.06826666666666667, "grad_norm": 0.012563909403979778, "learning_rate": 3.777777777777778e-06, "loss": 0.256, "num_tokens": 13737522.0, "reward": 0.9775606989860535, "reward_std": 0.10948432981967926, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.7988713979721069, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8879748061299324, "aux_distill/mean_u": 0.3467711585862075, "aux_distill/n_active_tok": 175.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4754679144385026, "calib/avg_num_step_conf": 5.53515625, "calib/ece": 0.1719724409448819, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.030123796791443863, "calib/mean_conf": 0.2243267716535433, "calib/mu_c": 0.19823529411764707, "calib/mu_w": 0.22835909090909093, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1312204724409449, "calib/std_conf": 0.20970964064097472, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30329139072847683, "calib/step_q_c_n": 151.0, "calib/step_q_gap": -0.01926469142002235, "calib/step_q_w": 0.3225560821484992, "calib/step_q_w_n": 1266.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 693.0, "completions/max_terminated_length": 693.0, "completions/mean_length": 222.6328125, "completions/mean_terminated_length": 223.50588989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.06933333333333333, "grad_norm": 0.013984648510813713, "learning_rate": 3.7500000000000005e-06, "loss": 0.2588, "num_tokens": 13899540.0, "reward": 0.9684619903564453, "reward_std": 0.11982069909572601, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8158302307128906, "rewards/format_reward_step": 0.98828125, "step": 65 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8976942636072636, "aux_distill/mean_u": 0.3792002791541046, "aux_distill/n_active_tok": 193.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.3524753792919883, "calib/avg_num_step_conf": 6.03125, "calib/ece": 0.17583921568627453, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.08709728506787331, "calib/mean_conf": 0.21051372549019606, "calib/mu_c": 0.1350294117647059, "calib/mu_w": 0.2221266968325792, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12650980392156866, "calib/std_conf": 0.18832027060679057, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3032251308900524, "calib/step_q_c_n": 191.0, "calib/step_q_gap": 0.0042280872832526906, "calib/step_q_w": 0.2989970436067997, "calib/step_q_w_n": 1353.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 249.69140625, "completions/mean_terminated_length": 249.69140625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.0704, "grad_norm": 0.024497196078300476, "learning_rate": 3.7222222222222225e-06, "loss": 0.321, "num_tokens": 14069813.0, "reward": 0.9704102873802185, "reward_std": 0.09235270321369171, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.815820574760437, "rewards/format_reward_step": 0.9921875, "step": 66 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8928143437951803, "aux_distill/mean_u": 0.3683333400300812, "aux_distill/n_active_tok": 201.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5283459595959596, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.11288671875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.017947979797979796, "calib/mean_conf": 0.18846484375, "calib/mu_c": 0.2038888888888889, "calib/mu_w": 0.1859409090909091, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08036328125, "calib/std_conf": 0.15787680229070414, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.31103092783505154, "calib/step_q_c_n": 194.0, "calib/step_q_gap": 0.002063050740079453, "calib/step_q_w": 0.3089678770949721, "calib/step_q_w_n": 1432.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 818.0, "completions/max_terminated_length": 818.0, "completions/mean_length": 259.2109375, "completions/mean_terminated_length": 260.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.07146666666666666, "grad_norm": 0.01422516256570816, "learning_rate": 3.694444444444445e-06, "loss": 0.2683, "num_tokens": 14241179.0, "reward": 0.9945505857467651, "reward_std": 0.07644922286272049, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.852382481098175, "rewards/format_reward_step": 0.99609375, "step": 67 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8721083607524633, "aux_distill/mean_u": 0.33766640953665167, "aux_distill/n_active_tok": 196.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47830636160714285, "calib/avg_num_step_conf": 6.2578125, "calib/ece": 0.14378906249999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.0017410714285714113, "calib/mean_conf": 0.1987890625, "calib/mu_c": 0.2003125, "calib/mu_w": 0.1985714285714286, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10878906249999999, "calib/std_conf": 0.15857427047087777, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.282723880597015, "calib/step_q_c_n": 134.0, "calib/step_q_gap": 0.010048131278213857, "calib/step_q_w": 0.2726757493188011, "calib/step_q_w_n": 1468.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 833.0, "completions/max_terminated_length": 833.0, "completions/mean_length": 244.5546875, "completions/mean_terminated_length": 245.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.07253333333333334, "grad_norm": 0.017560146749019623, "learning_rate": 3.6666666666666666e-06, "loss": 0.2459, "num_tokens": 14407873.0, "reward": 0.9854179620742798, "reward_std": 0.08683042228221893, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8536484241485596, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8708281461149454, "aux_distill/mean_u": 0.369939081804995, "aux_distill/n_active_tok": 212.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5502949852507375, "calib/avg_num_step_conf": 6.65234375, "calib/ece": 0.10876953124999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.056848082595870236, "calib/mean_conf": 0.18248046875, "calib/mu_c": 0.2326666666666667, "calib/mu_w": 0.17581858407079645, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08703124999999998, "calib/std_conf": 0.14876641062763554, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2950264550264551, "calib/step_q_c_n": 189.0, "calib/step_q_gap": 0.010110338778106298, "calib/step_q_w": 0.2849161162483488, "calib/step_q_w_n": 1514.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1226.0, "completions/max_terminated_length": 1226.0, "completions/mean_length": 275.57421875, "completions/mean_terminated_length": 276.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.0736, "grad_norm": 0.022750258445739746, "learning_rate": 3.638888888888889e-06, "loss": 0.2769, "num_tokens": 14582916.0, "reward": 0.995856761932373, "reward_std": 0.0744517594575882, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.8784323334693909, "rewards/format_reward_step": 0.99609375, "step": 69 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8883331622928381, "aux_distill/mean_u": 0.35891217924730157, "aux_distill/n_active_tok": 208.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5104230533415083, "calib/avg_num_step_conf": 7.11328125, "calib/ece": 0.11444881889763778, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.027016145513999584, "calib/mean_conf": 0.1575984251968504, "calib/mu_c": 0.1823809523809524, "calib/mu_w": 0.1553648068669528, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09468503937007872, "calib/std_conf": 0.1397020879706054, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.27740740740740744, "calib/step_q_c_n": 135.0, "calib/step_q_gap": 0.03019922235402664, "calib/step_q_w": 0.2472081850533808, "calib/step_q_w_n": 1686.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 790.0, "completions/max_terminated_length": 790.0, "completions/mean_length": 264.296875, "completions/mean_terminated_length": 266.3779602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.07466666666666667, "grad_norm": 0.0257136020809412, "learning_rate": 3.6111111111111115e-06, "loss": 0.2185, "num_tokens": 14757568.0, "reward": 0.9734380841255188, "reward_std": 0.09189367294311523, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.8843761682510376, "rewards/format_reward_step": 0.98046875, "step": 70 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8784806150943041, "aux_distill/mean_u": 0.3529545415400276, "aux_distill/n_active_tok": 216.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4949299855142444, "calib/avg_num_step_conf": 6.98828125, "calib/ece": 0.1296484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005101400289715119, "calib/mean_conf": 0.1714453125, "calib/mu_c": 0.17578947368421052, "calib/mu_w": 0.1706880733944954, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.076328125, "calib/std_conf": 0.16355407212685763, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3313185344827586, "calib/step_q_c_n": 232.0, "calib/step_q_gap": 0.03309554154762695, "calib/step_q_w": 0.29822299293513166, "calib/step_q_w_n": 1557.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1264.0, "completions/max_terminated_length": 1264.0, "completions/mean_length": 280.64453125, "completions/mean_terminated_length": 281.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.07573333333333333, "grad_norm": 0.03355726599693298, "learning_rate": 3.5833333333333335e-06, "loss": 0.2439, "num_tokens": 14933821.0, "reward": 0.9902146458625793, "reward_std": 0.09100230038166046, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8398042321205139, "rewards/format_reward_step": 0.9921875, "step": 71 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8864923529326916, "aux_distill/mean_u": 0.3814361559322445, "aux_distill/n_active_tok": 249.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5487861811391224, "calib/avg_num_step_conf": 7.86328125, "calib/ece": 0.09687890625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.007557889822595698, "calib/mean_conf": 0.15241796875, "calib/mu_c": 0.15944444444444447, "calib/mu_w": 0.15188655462184877, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0894921875, "calib/std_conf": 0.13825932774454686, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21701754385964916, "calib/step_q_c_n": 114.0, "calib/step_q_gap": -0.0705435409218148, "calib/step_q_w": 0.28756108478146397, "calib/step_q_w_n": 1899.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 975.0, "completions/max_terminated_length": 975.0, "completions/mean_length": 289.875, "completions/mean_terminated_length": 291.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.0768, "grad_norm": 0.03782118856906891, "learning_rate": 3.555555555555556e-06, "loss": 0.289, "num_tokens": 15112437.0, "reward": 0.9824103116989136, "reward_std": 0.07157760858535767, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.9023206830024719, "rewards/format_reward_step": 0.9921875, "step": 72 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.881175821647048, "aux_distill/mean_u": 0.3102595000996086, "aux_distill/n_active_tok": 232.375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5427479990298326, "calib/avg_num_step_conf": 7.265625, "calib/ece": 0.1055254901960784, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014407955372301717, "calib/mean_conf": 0.14905490196078433, "calib/mu_c": 0.1613157894736842, "calib/mu_w": 0.14690783410138247, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05278039215686275, "calib/std_conf": 0.15025799031146853, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3429217391304348, "calib/step_q_c_n": 230.0, "calib/step_q_gap": 0.04303124833288874, "calib/step_q_w": 0.29989049079754604, "calib/step_q_w_n": 1630.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1199.0, "completions/max_terminated_length": 1199.0, "completions/mean_length": 292.578125, "completions/mean_terminated_length": 293.7254943847656, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.07786666666666667, "grad_norm": 0.029115431010723114, "learning_rate": 3.5277777777777784e-06, "loss": 0.246, "num_tokens": 15294369.0, "reward": 0.9939548969268799, "reward_std": 0.08326876163482666, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.8472848534584045, "rewards/format_reward_step": 0.9921875, "step": 73 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8466088864952326, "aux_distill/mean_u": 0.3597551678076145, "aux_distill/n_active_tok": 258.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48332210242587603, "calib/avg_num_step_conf": 8.84765625, "calib/ece": 0.11814960629921259, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013522012578616738, "calib/mean_conf": 0.15720472440944885, "calib/mu_c": 0.15833333333333335, "calib/mu_w": 0.15698113207547168, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05499999999999999, "calib/std_conf": 0.139694765038833, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21719741100323625, "calib/step_q_c_n": 309.0, "calib/step_q_gap": -0.06678162785156944, "calib/step_q_w": 0.2839790388548057, "calib/step_q_w_n": 1956.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1924.0, "completions/max_terminated_length": 1924.0, "completions/mean_length": 295.48828125, "completions/mean_terminated_length": 297.8149719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.07893333333333333, "grad_norm": 0.016858849674463272, "learning_rate": 3.5e-06, "loss": 0.2195, "num_tokens": 15473942.0, "reward": 0.992323637008667, "reward_std": 0.08323856443166733, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.832303524017334, "rewards/format_reward_step": 0.98828125, "step": 74 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8464425075799227, "aux_distill/mean_u": 0.3196149480640056, "aux_distill/n_active_tok": 260.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5824703344120821, "calib/avg_num_step_conf": 9.08984375, "calib/ece": 0.07495219123505976, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.05759708737864086, "calib/mean_conf": 0.14472908366533863, "calib/mu_c": 0.19200000000000006, "calib/mu_w": 0.1344029126213592, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02019920318725099, "calib/std_conf": 0.13802181791973867, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3805806451612903, "calib/step_q_c_n": 310.0, "calib/step_q_gap": 0.11913245477953521, "calib/step_q_w": 0.2614481903817551, "calib/step_q_w_n": 2017.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2202.0, "completions/max_terminated_length": 2202.0, "completions/mean_length": 295.87890625, "completions/mean_terminated_length": 299.3873596191406, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.08, "grad_norm": 0.0117812380194664, "learning_rate": 3.4722222222222224e-06, "loss": 0.1928, "num_tokens": 15654439.0, "reward": 0.9965642094612122, "reward_std": 0.11236327886581421, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8329721689224243, "rewards/format_reward_step": 0.98046875, "step": 75 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8963596131652594, "aux_distill/mean_u": 0.37019422318096695, "aux_distill/n_active_tok": 243.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5180759803921569, "calib/avg_num_step_conf": 8.1953125, "calib/ece": 0.1313095238095238, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014779411764705846, "calib/mean_conf": 0.12511904761904763, "calib/mu_c": 0.1370833333333333, "calib/mu_w": 0.12230392156862746, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03297619047619047, "calib/std_conf": 0.1267409623679342, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33612712418300655, "calib/step_q_c_n": 306.0, "calib/step_q_gap": 0.042306811683006595, "calib/step_q_w": 0.29382031249999996, "calib/step_q_w_n": 1792.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2717.0, "completions/max_terminated_length": 2717.0, "completions/mean_length": 293.77734375, "completions/mean_terminated_length": 297.2608947753906, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.08106666666666666, "grad_norm": 0.010141227394342422, "learning_rate": 3.444444444444445e-06, "loss": 0.2049, "num_tokens": 15832702.0, "reward": 0.9905637502670288, "reward_std": 0.10064421594142914, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8131588697433472, "rewards/format_reward_step": 0.98046875, "step": 76 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.870359031483531, "aux_distill/mean_u": 0.34247186531231555, "aux_distill/n_active_tok": 248.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43320664414414417, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.12236614173228347, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03174239864864864, "calib/mean_conf": 0.14464960629921259, "calib/mu_c": 0.11690625000000002, "calib/mu_w": 0.14864864864864866, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07051574803149607, "calib/std_conf": 0.14479675974863274, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2990127659574468, "calib/step_q_c_n": 235.0, "calib/step_q_gap": -0.0011852934032837714, "calib/step_q_w": 0.3001980593607306, "calib/step_q_w_n": 1752.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2666.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 299.01953125, "completions/mean_terminated_length": 299.01953125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.08213333333333334, "grad_norm": 0.016655778512358665, "learning_rate": 3.416666666666667e-06, "loss": 0.269, "num_tokens": 16013915.0, "reward": 0.9746717214584351, "reward_std": 0.10004068166017532, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8438748121261597, "rewards/format_reward_step": 0.98046875, "step": 77 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8313043918460608, "aux_distill/mean_u": 0.3717599451626029, "aux_distill/n_active_tok": 313.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.37681159420289856, "calib/avg_num_step_conf": 10.234375, "calib/ece": 0.16553174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03759420289855073, "calib/mean_conf": 0.14176984126984127, "calib/mu_c": 0.1108888888888889, "calib/mu_w": 0.14848309178743962, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06436507936507935, "calib/std_conf": 0.13602822201914236, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28372781065088754, "calib/step_q_c_n": 338.0, "calib/step_q_gap": -0.019563162179962623, "calib/step_q_w": 0.30329097283085016, "calib/step_q_w_n": 2282.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 362.7734375, "completions/mean_terminated_length": 365.6299133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0832, "grad_norm": 0.019840573891997337, "learning_rate": 3.3888888888888893e-06, "loss": 0.3363, "num_tokens": 16214809.0, "reward": 0.9848675727844238, "reward_std": 0.09170843660831451, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8095788955688477, "rewards/format_reward_step": 0.984375, "step": 78 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8580750748515129, "aux_distill/mean_u": 0.3492695963216024, "aux_distill/n_active_tok": 288.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5498194538478899, "calib/avg_num_step_conf": 9.7421875, "calib/ece": 0.09699604743083001, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021831415030467177, "calib/mean_conf": 0.1082213438735178, "calib/mu_c": 0.12642857142857145, "calib/mu_w": 0.10459715639810427, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.019604743083003952, "calib/std_conf": 0.11020820405074769, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.4059787581699347, "calib/step_q_c_n": 306.0, "calib/step_q_gap": 0.10781157352642462, "calib/step_q_w": 0.29816718464351005, "calib/step_q_w_n": 2188.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 323.7109375, "completions/mean_terminated_length": 327.5494079589844, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.08426666666666667, "grad_norm": 0.012313942424952984, "learning_rate": 3.3611111111111117e-06, "loss": 0.1974, "num_tokens": 16404055.0, "reward": 0.9972343444824219, "reward_std": 0.07284507155418396, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8421249985694885, "rewards/format_reward_step": 0.98828125, "step": 79 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9100547656416893, "aux_distill/mean_u": 0.3595352341716097, "aux_distill/n_active_tok": 270.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5754641909814323, "calib/avg_num_step_conf": 8.84375, "calib/ece": 0.14763636363636362, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025727320954907165, "calib/mean_conf": 0.10706719367588934, "calib/mu_c": 0.12689655172413794, "calib/mu_w": 0.10116923076923078, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012727272727272724, "calib/std_conf": 0.11064817757389835, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.36373684210526314, "calib/step_q_c_n": 380.0, "calib/step_q_gap": 0.03152336015197221, "calib/step_q_w": 0.3322134819532909, "calib/step_q_w_n": 1884.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2735.0, "completions/max_terminated_length": 2735.0, "completions/mean_length": 320.18359375, "completions/mean_terminated_length": 322.7047119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.08533333333333333, "grad_norm": 0.01683088205754757, "learning_rate": 3.3333333333333333e-06, "loss": 0.3057, "num_tokens": 16588182.0, "reward": 1.0053168535232544, "reward_std": 0.08265817165374756, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7957901358604431, "rewards/format_reward_step": 0.98828125, "step": 80 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8782324083149433, "aux_distill/mean_u": 0.3618072356264667, "aux_distill/n_active_tok": 303.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5356892523364487, "calib/avg_num_step_conf": 9.46875, "calib/ece": 0.09224606299212595, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005579439252336435, "calib/mean_conf": 0.09654921259842521, "calib/mu_c": 0.10124999999999999, "calib/mu_w": 0.09567056074766356, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015657480314960632, "calib/std_conf": 0.10259747880362516, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3253183520599251, "calib/step_q_c_n": 267.0, "calib/step_q_gap": 0.032672686784078975, "calib/step_q_w": 0.2926456652758461, "calib/step_q_w_n": 2157.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 353.26953125, "completions/mean_terminated_length": 353.26953125, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.0864, "grad_norm": 0.01895328424870968, "learning_rate": 3.3055555555555558e-06, "loss": 0.3637, "num_tokens": 16784867.0, "reward": 0.9981613159179688, "reward_std": 0.05369744449853897, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.847885251045227, "rewards/format_reward_step": 0.9921875, "step": 81 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8312947601079941, "aux_distill/mean_u": 0.3230309421148612, "aux_distill/n_active_tok": 321.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5915977961432507, "calib/avg_num_step_conf": 10.36328125, "calib/ece": 0.07542687747035573, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03115303030303028, "calib/mean_conf": 0.09139525691699607, "calib/mu_c": 0.11848484848484847, "calib/mu_w": 0.08733181818181819, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018193675889328062, "calib/std_conf": 0.08681989158217782, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30495833333333333, "calib/step_q_c_n": 264.0, "calib/step_q_gap": 0.015112540114413264, "calib/step_q_w": 0.28984579321892007, "calib/step_q_w_n": 2389.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2439.0, "completions/max_terminated_length": 2439.0, "completions/mean_length": 374.703125, "completions/mean_terminated_length": 377.6535339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.08746666666666666, "grad_norm": 0.023921169340610504, "learning_rate": 3.277777777777778e-06, "loss": 0.2663, "num_tokens": 16986343.0, "reward": 0.9957023859024048, "reward_std": 0.0568794310092926, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.8742173910140991, "rewards/format_reward_step": 0.98828125, "step": 82 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8398945815861225, "aux_distill/mean_u": 0.32894001525501954, "aux_distill/n_active_tok": 325.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5557356887298748, "calib/avg_num_step_conf": 11.515625, "calib/ece": 0.11826693227091632, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014943537567084025, "calib/mean_conf": 0.08715139442231076, "calib/mu_c": 0.0995348837209302, "calib/mu_w": 0.08459134615384617, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01705179282868526, "calib/std_conf": 0.09748061911802404, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27858310626703, "calib/step_q_c_n": 367.0, "calib/step_q_gap": 0.006843857913678586, "calib/step_q_w": 0.2717392483533514, "calib/step_q_w_n": 2581.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 363.55078125, "completions/mean_terminated_length": 369.3214416503906, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.08853333333333334, "grad_norm": 0.012897885404527187, "learning_rate": 3.2500000000000002e-06, "loss": 0.269, "num_tokens": 17186676.0, "reward": 0.9851524233818054, "reward_std": 0.09537991881370544, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8257735967636108, "rewards/format_reward_step": 0.9765625, "step": 83 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8542652856558561, "aux_distill/mean_u": 0.3600303893154187, "aux_distill/n_active_tok": 357.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.45500782472613455, "calib/avg_num_step_conf": 12.5, "calib/ece": 0.09252610441767069, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.017144757433489824, "calib/mean_conf": 0.08827710843373494, "calib/mu_c": 0.07361111111111111, "calib/mu_w": 0.09075586854460094, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018112449799196788, "calib/std_conf": 0.08681147647554294, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3660130718954248, "calib/step_q_c_n": 306.0, "calib/step_q_gap": 0.10061362476342756, "calib/step_q_w": 0.26539944713199726, "calib/step_q_w_n": 2894.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3023.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 395.32421875, "completions/mean_terminated_length": 403.19921875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0896, "grad_norm": 0.012995940633118153, "learning_rate": 3.2222222222222227e-06, "loss": 0.2734, "num_tokens": 17393799.0, "reward": 0.9755528569221497, "reward_std": 0.09832397103309631, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8378244638442993, "rewards/format_reward_step": 0.97265625, "step": 84 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8209226354956627, "aux_distill/mean_u": 0.3484133899235399, "aux_distill/n_active_tok": 341.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4757109557109557, "calib/avg_num_step_conf": 11.7265625, "calib/ece": 0.17984, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01892773892773894, "calib/mean_conf": 0.0864, "calib/mu_c": 0.07163636363636364, "calib/mu_w": 0.09056410256410258, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02312, "calib/std_conf": 0.10659756094770649, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3235185185185185, "calib/step_q_c_n": 432.0, "calib/step_q_gap": 0.08762163135898537, "calib/step_q_w": 0.2358968871595331, "calib/step_q_w_n": 2570.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2845.0, "completions/max_terminated_length": 2845.0, "completions/mean_length": 378.8984375, "completions/mean_terminated_length": 384.9127197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.09066666666666667, "grad_norm": 0.020151106640696526, "learning_rate": 3.1944444444444443e-06, "loss": 0.2458, "num_tokens": 17598621.0, "reward": 0.9827597141265869, "reward_std": 0.09446272999048233, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7741132974624634, "rewards/format_reward_step": 0.9765625, "step": 85 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8645725585520267, "aux_distill/mean_u": 0.3442900409885884, "aux_distill/n_active_tok": 297.375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.43505874880914575, "calib/avg_num_step_conf": 12.54296875, "calib/ece": 0.1632258064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.022392293849899464, "calib/mean_conf": 0.06346774193548388, "calib/mu_c": 0.0453191489361702, "calib/mu_w": 0.06771144278606966, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01858870967741936, "calib/std_conf": 0.09685826321468737, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29846846846846853, "calib/step_q_c_n": 333.0, "calib/step_q_gap": 0.06393997646012942, "calib/step_q_w": 0.2345284920083391, "calib/step_q_w_n": 2878.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 1411.0, "completions/max_terminated_length": 1411.0, "completions/mean_length": 330.70703125, "completions/mean_terminated_length": 341.375, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.09173333333333333, "grad_norm": 0.026438122615218163, "learning_rate": 3.1666666666666667e-06, "loss": 0.2352, "num_tokens": 17788794.0, "reward": 0.966668963432312, "reward_std": 0.10759761929512024, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.7849003672599792, "rewards/format_reward_step": 0.96484375, "step": 86 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8219904322177172, "aux_distill/mean_u": 0.3128133960194839, "aux_distill/n_active_tok": 360.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.546790994623656, "calib/avg_num_step_conf": 12.671875, "calib/ece": 0.20407999999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01063340053763441, "calib/mean_conf": 0.059120000000000006, "calib/mu_c": 0.06703125000000001, "calib/mu_w": 0.056397849462365604, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0036000000000000003, "calib/std_conf": 0.06984, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2833517495395948, "calib/step_q_c_n": 543.0, "calib/step_q_gap": 0.020853453233189767, "calib/step_q_w": 0.26249829630640503, "calib/step_q_w_n": 2701.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2720.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 392.44921875, "completions/mean_terminated_length": 400.2669372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0928, "grad_norm": 0.02165599912405014, "learning_rate": 3.138888888888889e-06, "loss": 0.3066, "num_tokens": 17994757.0, "reward": 0.9853259921073914, "reward_std": 0.09768631309270859, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7479957342147827, "rewards/format_reward_step": 0.97265625, "step": 87 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8673255424946547, "aux_distill/mean_u": 0.3248154467273917, "aux_distill/n_active_tok": 315.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.507420091324201, "calib/avg_num_step_conf": 10.01171875, "calib/ece": 0.11343529411764706, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005678462709284626, "calib/mean_conf": 0.056682352941176475, "calib/mu_c": 0.05180555555555556, "calib/mu_w": 0.05748401826484019, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.014470588235294117, "calib/std_conf": 0.08278811297140205, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28467619047619047, "calib/step_q_c_n": 315.0, "calib/step_q_gap": -0.005169005253346903, "calib/step_q_w": 0.2898451957295374, "calib/step_q_w_n": 2248.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1366.0, "completions/max_terminated_length": 1366.0, "completions/mean_length": 372.55078125, "completions/mean_terminated_length": 374.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.09386666666666667, "grad_norm": 0.011959103867411613, "learning_rate": 3.1111111111111116e-06, "loss": 0.28, "num_tokens": 18199978.0, "reward": 0.9983651638031006, "reward_std": 0.03053853288292885, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.860011637210846, "rewards/format_reward_step": 0.99609375, "step": 88 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8707714267075062, "aux_distill/mean_u": 0.35302710012356325, "aux_distill/n_active_tok": 327.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6254841649578493, "calib/avg_num_step_conf": 10.9140625, "calib/ece": 0.13034661354581673, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018764411027568914, "calib/mean_conf": 0.03961354581673306, "calib/mu_c": 0.05523809523809523, "calib/mu_w": 0.03647368421052632, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0013147410358565737, "calib/std_conf": 0.047545805716298926, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2900466216216216, "calib/step_q_c_n": 296.0, "calib/step_q_gap": 0.029466957890636802, "calib/step_q_w": 0.2605796637309848, "calib/step_q_w_n": 2498.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3048.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 390.97265625, "completions/mean_terminated_length": 395.60870361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.09493333333333333, "grad_norm": 0.012375755235552788, "learning_rate": 3.0833333333333336e-06, "loss": 0.2737, "num_tokens": 18408955.0, "reward": 0.9838107824325562, "reward_std": 0.06585811078548431, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8269965648651123, "rewards/format_reward_step": 0.9765625, "step": 89 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8781138341873884, "aux_distill/mean_u": 0.35365546245273183, "aux_distill/n_active_tok": 309.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5553408480944713, "calib/avg_num_step_conf": 10.75390625, "calib/ece": 0.15353174603174605, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0037584541062802013, "calib/mean_conf": 0.03424603174603174, "calib/mu_c": 0.037333333333333336, "calib/mu_w": 0.033574879227053135, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004603174603174601, "calib/std_conf": 0.045292556329447065, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2954569732937686, "calib/step_q_c_n": 337.0, "calib/step_q_gap": 0.03972734261082156, "calib/step_q_w": 0.255729630682947, "calib/step_q_w_n": 2416.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 347.81640625, "completions/mean_terminated_length": 351.94073486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.096, "grad_norm": 0.01080415491014719, "learning_rate": 3.055555555555556e-06, "loss": 0.3039, "num_tokens": 18601316.0, "reward": 0.9893505573272705, "reward_std": 0.05818493664264679, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.818544864654541, "rewards/format_reward_step": 0.984375, "step": 90 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8643928486853838, "aux_distill/mean_u": 0.3049086768321826, "aux_distill/n_active_tok": 293.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5152314814814815, "calib/avg_num_step_conf": 9.6328125, "calib/ece": 0.17846456692913387, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003268518518518518, "calib/mean_conf": 0.03594488188976378, "calib/mu_c": 0.03851851851851852, "calib/mu_w": 0.035250000000000004, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009055118110236221, "calib/std_conf": 0.03876657523814441, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28237416481069044, "calib/step_q_c_n": 449.0, "calib/step_q_gap": -0.01975587981003335, "calib/step_q_w": 0.3021300446207238, "calib/step_q_w_n": 2017.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2455.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 360.91015625, "completions/mean_terminated_length": 362.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.09706666666666666, "grad_norm": 0.010622053407132626, "learning_rate": 3.0277777777777776e-06, "loss": 0.3275, "num_tokens": 18801421.0, "reward": 0.9989259243011475, "reward_std": 0.03695327788591385, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7947269678115845, "rewards/format_reward_step": 0.9921875, "step": 91 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8596785441040993, "aux_distill/mean_u": 0.2708570878329426, "aux_distill/n_active_tok": 268.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5676567656765676, "calib/avg_num_step_conf": 8.78125, "calib/ece": 0.17711462450592885, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006685109687439333, "calib/mean_conf": 0.02446640316205534, "calib/mu_c": 0.02980392156862745, "calib/mu_w": 0.02311881188118812, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.028049431551867763, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.35421364985163206, "calib/step_q_c_n": 337.0, "calib/step_q_gap": 0.0886029748123856, "calib/step_q_w": 0.26561067503924646, "calib/step_q_w_n": 1911.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1140.0, "completions/max_terminated_length": 1140.0, "completions/mean_length": 325.58984375, "completions/mean_terminated_length": 329.4505920410156, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.09813333333333334, "grad_norm": 0.011056625284254551, "learning_rate": 3e-06, "loss": 0.2696, "num_tokens": 18991492.0, "reward": 0.9935341477394104, "reward_std": 0.04476837068796158, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.7995683550834656, "rewards/format_reward_step": 0.98828125, "step": 92 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8805052265524864, "aux_distill/mean_u": 0.2796066804729568, "aux_distill/n_active_tok": 286.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5519877675840978, "calib/avg_num_step_conf": 9.578125, "calib/ece": 0.11880708661417326, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00013302752293577885, "calib/mean_conf": 0.031114173228346458, "calib/mu_c": 0.031000000000000003, "calib/mu_w": 0.031133027522935782, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004094488188976378, "calib/std_conf": 0.052249088790471354, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.269597510373444, "calib/step_q_c_n": 241.0, "calib/step_q_gap": -0.010523339920540609, "calib/step_q_w": 0.2801208502939846, "calib/step_q_w_n": 2211.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1061.0, "completions/max_terminated_length": 1061.0, "completions/mean_length": 349.859375, "completions/mean_terminated_length": 352.6141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.0992, "grad_norm": 0.013927971012890339, "learning_rate": 2.9722222222222225e-06, "loss": 0.275, "num_tokens": 19186832.0, "reward": 0.9947122931480408, "reward_std": 0.030827995389699936, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8566120862960815, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8493856117129326, "aux_distill/mean_u": 0.2487502605932423, "aux_distill/n_active_tok": 252.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5254245283018868, "calib/avg_num_step_conf": 8.86328125, "calib/ece": 0.1831620553359684, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009423584905660372, "calib/mean_conf": 0.026324110671936764, "calib/mu_c": 0.033773584905660375, "calib/mu_w": 0.024350000000000004, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03887150327041116, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25506896551724134, "calib/step_q_c_n": 406.0, "calib/step_q_gap": 0.00925254039646839, "calib/step_q_w": 0.24581642512077295, "calib/step_q_w_n": 1863.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1706.0, "completions/max_terminated_length": 1706.0, "completions/mean_length": 307.9765625, "completions/mean_terminated_length": 310.4015808105469, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.10026666666666667, "grad_norm": 0.01232621818780899, "learning_rate": 2.944444444444445e-06, "loss": 0.2311, "num_tokens": 19374354.0, "reward": 0.9922507405281067, "reward_std": 0.05277451127767563, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7891890406608582, "rewards/format_reward_step": 0.984375, "step": 94 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8817452136427164, "aux_distill/mean_u": 0.30189744452687745, "aux_distill/n_active_tok": 239.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5293927125506074, "calib/avg_num_step_conf": 7.79296875, "calib/ece": 0.23651372549019606, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0016769230769230793, "calib/mean_conf": 0.02442745098039216, "calib/mu_c": 0.025676923076923076, "calib/mu_w": 0.023999999999999997, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003019607843137255, "calib/std_conf": 0.03759653758792873, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3133485193621867, "calib/step_q_c_n": 439.0, "calib/step_q_gap": 0.05890436769123558, "calib/step_q_w": 0.25444415167095114, "calib/step_q_w_n": 1556.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1162.0, "completions/max_terminated_length": 1162.0, "completions/mean_length": 290.3671875, "completions/mean_terminated_length": 291.5058898925781, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.10133333333333333, "grad_norm": 0.013984967023134232, "learning_rate": 2.916666666666667e-06, "loss": 0.2771, "num_tokens": 19554816.0, "reward": 1.0016120672225952, "reward_std": 0.02413991466164589, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.75322425365448, "rewards/format_reward_step": 0.99609375, "step": 95 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8744475580751896, "aux_distill/mean_u": 0.3089908472147716, "aux_distill/n_active_tok": 243.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4525542701156896, "calib/avg_num_step_conf": 7.66015625, "calib/ece": 0.36756862745098035, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0075685688288054005, "calib/mean_conf": 0.02333333333333333, "calib/mu_c": 0.018673469387755104, "calib/mu_w": 0.026242038216560504, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0032941176470588232, "calib/std_conf": 0.04555826550205304, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2549625187406297, "calib/step_q_c_n": 667.0, "calib/step_q_gap": -0.0016547919239762177, "calib/step_q_w": 0.2566173106646059, "calib/step_q_w_n": 1294.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1618.0, "completions/max_terminated_length": 1618.0, "completions/mean_length": 290.7109375, "completions/mean_terminated_length": 291.8509826660156, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.1024, "grad_norm": 0.015029377304017544, "learning_rate": 2.888888888888889e-06, "loss": 0.2411, "num_tokens": 19735054.0, "reward": 0.9980310201644897, "reward_std": 0.027630947530269623, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.6210620999336243, "rewards/format_reward_step": 0.9921875, "step": 96 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9030767139047384, "aux_distill/mean_u": 0.30071125929448483, "aux_distill/n_active_tok": 209.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5594911937377691, "calib/avg_num_step_conf": 6.85546875, "calib/ece": 0.11738582677165352, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003496412263535552, "calib/mean_conf": 0.02584251968503937, "calib/mu_c": 0.02885714285714286, "calib/mu_w": 0.025360730593607307, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002716535433070867, "calib/std_conf": 0.03778230748651435, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23632038834951458, "calib/step_q_c_n": 206.0, "calib/step_q_gap": -0.023218991895805008, "calib/step_q_w": 0.2595393802453196, "calib/step_q_w_n": 1549.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1566.0, "completions/max_terminated_length": 1566.0, "completions/mean_length": 257.7578125, "completions/mean_terminated_length": 259.78741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.10346666666666667, "grad_norm": 0.013639519922435284, "learning_rate": 2.861111111111111e-06, "loss": 0.2865, "num_tokens": 19906112.0, "reward": 0.9950933456420898, "reward_std": 0.03068731725215912, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8612803816795349, "rewards/format_reward_step": 0.9921875, "step": 97 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9290945921093225, "aux_distill/mean_u": 0.3275131960751481, "aux_distill/n_active_tok": 197.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.562069774718398, "calib/avg_num_step_conf": 6.24609375, "calib/ece": 0.2457421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011461201501877342, "calib/mean_conf": 0.023789062500000003, "calib/mu_c": 0.032205882352941174, "calib/mu_w": 0.020744680851063832, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001953125, "calib/std_conf": 0.04323015302275825, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29350133333333334, "calib/step_q_c_n": 375.0, "calib/step_q_gap": 0.04622764052287581, "calib/step_q_w": 0.24727369281045752, "calib/step_q_w_n": 1224.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 725.0, "completions/max_terminated_length": 725.0, "completions/mean_length": 244.2578125, "completions/mean_terminated_length": 245.2156982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.10453333333333334, "grad_norm": 0.022586099803447723, "learning_rate": 2.8333333333333335e-06, "loss": 0.298, "num_tokens": 20074826.0, "reward": 1.003431797027588, "reward_std": 0.02640916034579277, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7451449632644653, "rewards/format_reward_step": 0.99609375, "step": 98 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8971052411943674, "aux_distill/mean_u": 0.2990826274790156, "aux_distill/n_active_tok": 205.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5872300376461264, "calib/avg_num_step_conf": 6.41015625, "calib/ece": 0.17105882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0056597978997424275, "calib/mean_conf": 0.025019607843137257, "calib/mu_c": 0.029591836734693882, "calib/mu_w": 0.023932038834951454, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00196078431372549, "calib/std_conf": 0.033616925123338894, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3081704035874439, "calib/step_q_c_n": 223.0, "calib/step_q_gap": 0.025302843643861372, "calib/step_q_w": 0.28286755994358254, "calib/step_q_w_n": 1418.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 964.0, "completions/max_terminated_length": 964.0, "completions/mean_length": 245.54296875, "completions/mean_terminated_length": 246.50588989257812, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.1056, "grad_norm": 0.015454606153070927, "learning_rate": 2.805555555555556e-06, "loss": 0.2625, "num_tokens": 20243485.0, "reward": 1.0008832216262817, "reward_std": 0.021229008212685585, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8142663836479187, "rewards/format_reward_step": 0.99609375, "step": 99 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9302643742412329, "aux_distill/mean_u": 0.34172670215847983, "aux_distill/n_active_tok": 188.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5127572898799313, "calib/avg_num_step_conf": 5.9296875, "calib/ece": 0.15816406249999998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.0029030874785591805, "calib/mean_conf": 0.026914062500000002, "calib/mu_c": 0.02931818181818182, "calib/mu_w": 0.02641509433962264, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0066015625, "calib/std_conf": 0.07529790079906673, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2816595744680851, "calib/step_q_c_n": 235.0, "calib/step_q_gap": 0.029535646175021968, "calib/step_q_w": 0.2521239282930631, "calib/step_q_w_n": 1283.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 829.0, "completions/max_terminated_length": 829.0, "completions/mean_length": 240.265625, "completions/mean_terminated_length": 241.20785522460938, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.10666666666666667, "grad_norm": 0.025083284825086594, "learning_rate": 2.7777777777777783e-06, "loss": 0.2703, "num_tokens": 20412401.0, "reward": 1.0018420219421387, "reward_std": 0.017068054527044296, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8318089842796326, "rewards/format_reward_step": 1.0, "step": 100 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9050206709653139, "aux_distill/mean_u": 0.29593671982755226, "aux_distill/n_active_tok": 216.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5083482409063804, "calib/avg_num_step_conf": 6.78515625, "calib/ece": 0.1396456692913386, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015348837209302295, "calib/mean_conf": 0.021299212598425196, "calib/mu_c": 0.020000000000000004, "calib/mu_w": 0.021534883720930233, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037007874015748026, "calib/std_conf": 0.02894953930810734, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26361607142857146, "calib/step_q_c_n": 224.0, "calib/step_q_gap": -0.05943660537248602, "calib/step_q_w": 0.3230526768010575, "calib/step_q_w_n": 1513.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2364.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 281.3125, "completions/mean_terminated_length": 282.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.10773333333333333, "grad_norm": 0.0935661792755127, "learning_rate": 2.7500000000000004e-06, "loss": 0.3368, "num_tokens": 20591409.0, "reward": 0.9945935010910034, "reward_std": 0.02838076651096344, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8446558117866516, "rewards/format_reward_step": 0.9921875, "step": 101 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8885838855057955, "aux_distill/mean_u": 0.3037922940254807, "aux_distill/n_active_tok": 198.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5216058085436346, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.29317647058823526, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.022142957845763426, "calib/mean_conf": 0.0283921568627451, "calib/mu_c": 0.04341463414634146, "calib/mu_w": 0.021271676300578034, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07588021479482467, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30633569739952715, "calib/step_q_c_n": 423.0, "calib/step_q_gap": 0.017407187494273058, "calib/step_q_w": 0.2889285099052541, "calib/step_q_w_n": 1161.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2648.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 238.81640625, "completions/mean_terminated_length": 238.81640625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.1088, "grad_norm": 0.07671157270669937, "learning_rate": 2.7222222222222224e-06, "loss": 0.2947, "num_tokens": 20759242.0, "reward": 1.0067307949066162, "reward_std": 0.03035442717373371, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.697055459022522, "rewards/format_reward_step": 0.99609375, "step": 102 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9347370229661465, "aux_distill/mean_u": 0.31828543972856727, "aux_distill/n_active_tok": 190.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5022981781714858, "calib/avg_num_step_conf": 5.96484375, "calib/ece": 0.2288235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001407320742102619, "calib/mean_conf": 0.026549019607843144, "calib/mu_c": 0.02548387096774194, "calib/mu_w": 0.02689119170984456, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00611764705882353, "calib/std_conf": 0.054863936412243276, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2631372549019608, "calib/step_q_c_n": 306.0, "calib/step_q_gap": -0.03180623404152816, "calib/step_q_w": 0.294943488943489, "calib/step_q_w_n": 1221.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 257.29296875, "completions/mean_terminated_length": 257.29296875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.10986666666666667, "grad_norm": 0.03052721731364727, "learning_rate": 2.6944444444444444e-06, "loss": 0.3177, "num_tokens": 20929661.0, "reward": 1.000415325164795, "reward_std": 0.025061478838324547, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.762549638748169, "rewards/format_reward_step": 0.99609375, "step": 103 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8932774048298597, "aux_distill/mean_u": 0.2898657778103306, "aux_distill/n_active_tok": 189.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4949517867271696, "calib/avg_num_step_conf": 5.9375, "calib/ece": 0.1467109375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003585025524673848, "calib/mean_conf": 0.0181328125, "calib/mu_c": 0.015121951219512198, "calib/mu_w": 0.018706976744186046, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0023437500000000003, "calib/std_conf": 0.03397560501065204, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30661643835616437, "calib/step_q_c_n": 219.0, "calib/step_q_gap": 0.023472702768155163, "calib/step_q_w": 0.2831437355880092, "calib/step_q_w_n": 1301.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 814.0, "completions/max_terminated_length": 814.0, "completions/mean_length": 246.51171875, "completions/mean_terminated_length": 247.4784393310547, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.11093333333333333, "grad_norm": 0.03477464243769646, "learning_rate": 2.666666666666667e-06, "loss": 0.2265, "num_tokens": 21099448.0, "reward": 1.0016803741455078, "reward_std": 0.006197728216648102, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8432043790817261, "rewards/format_reward_step": 1.0, "step": 104 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8812897838652134, "aux_distill/mean_u": 0.27382185952685056, "aux_distill/n_active_tok": 196.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5686187518485655, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.180328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007855664004732289, "calib/mean_conf": 0.019125000000000003, "calib/mu_c": 0.01848979591836735, "calib/mu_w": 0.01927536231884058, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0040234375, "calib/std_conf": 0.03281696398206269, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2969296296296296, "calib/step_q_c_n": 270.0, "calib/step_q_gap": -0.0015341318382602886, "calib/step_q_w": 0.2984637614678899, "calib/step_q_w_n": 1308.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 746.0, "completions/max_terminated_length": 746.0, "completions/mean_length": 252.25, "completions/mean_terminated_length": 253.23922729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.112, "grad_norm": 0.0723215714097023, "learning_rate": 2.6388888888888893e-06, "loss": 0.2398, "num_tokens": 21269784.0, "reward": 1.0028176307678223, "reward_std": 0.007789917290210724, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8142291307449341, "rewards/format_reward_step": 1.0, "step": 105 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9138540420681238, "aux_distill/mean_u": 0.3225658485289486, "aux_distill/n_active_tok": 176.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4124025974025974, "calib/avg_num_step_conf": 5.81640625, "calib/ece": 0.12572549019607845, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007142857142857145, "calib/mean_conf": 0.017019607843137254, "calib/mu_c": 0.010857142857142857, "calib/mu_w": 0.018000000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002745098039215686, "calib/std_conf": 0.03509845546910577, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3506372549019608, "calib/step_q_c_n": 204.0, "calib/step_q_gap": 0.06313639887083239, "calib/step_q_w": 0.2875008560311284, "calib/step_q_w_n": 1285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 233.23828125, "completions/mean_terminated_length": 234.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.11306666666666666, "grad_norm": 0.07096699625253677, "learning_rate": 2.6111111111111113e-06, "loss": 0.2551, "num_tokens": 21434077.0, "reward": 0.996820330619812, "reward_std": 0.015735477209091187, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.8608281016349792, "rewards/format_reward_step": 0.99609375, "step": 106 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9093293808400631, "aux_distill/mean_u": 0.30919132201128474, "aux_distill/n_active_tok": 197.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4912209302325581, "calib/avg_num_step_conf": 6.3984375, "calib/ece": 0.14180392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.012360465116279072, "calib/mean_conf": 0.016078431372549017, "calib/mu_c": 0.026500000000000003, "calib/mu_w": 0.01413953488372093, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005098039215686275, "calib/std_conf": 0.04103936089460272, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3354362139917696, "calib/step_q_c_n": 243.0, "calib/step_q_gap": 0.06793012080180544, "calib/step_q_w": 0.26750609318996416, "calib/step_q_w_n": 1395.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1903.0, "completions/max_terminated_length": 1903.0, "completions/mean_length": 252.84765625, "completions/mean_terminated_length": 253.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.11413333333333334, "grad_norm": 0.02068367414176464, "learning_rate": 2.5833333333333337e-06, "loss": 0.3011, "num_tokens": 21603422.0, "reward": 0.9953607320785522, "reward_std": 0.030102472752332687, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8422839641571045, "rewards/format_reward_step": 0.9921875, "step": 107 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8714952226728201, "aux_distill/mean_u": 0.2846358781803798, "aux_distill/n_active_tok": 194.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4913869913869914, "calib/avg_num_step_conf": 6.109375, "calib/ece": 0.27914453125000005, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004782150282150286, "calib/mean_conf": 0.017183593750000004, "calib/mu_c": 0.013783783783783784, "calib/mu_w": 0.01856593406593407, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0036328125, "calib/std_conf": 0.034780021378443125, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29395225464190977, "calib/step_q_c_n": 377.0, "calib/step_q_gap": 0.020776180505431285, "calib/step_q_w": 0.2731760741364785, "calib/step_q_w_n": 1187.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 893.0, "completions/max_terminated_length": 893.0, "completions/mean_length": 248.33203125, "completions/mean_terminated_length": 249.30589294433594, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.1152, "grad_norm": 0.03424735739827156, "learning_rate": 2.5555555555555557e-06, "loss": 0.2994, "num_tokens": 21770227.0, "reward": 1.0032318830490112, "reward_std": 0.008335249498486519, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7174013257026672, "rewards/format_reward_step": 1.0, "step": 108 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8937185276299715, "aux_distill/mean_u": 0.27320062215980184, "aux_distill/n_active_tok": 207.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4570330167345093, "calib/avg_num_step_conf": 6.5546875, "calib/ece": 0.204938671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001197602894617818, "calib/mean_conf": 0.011467578124999999, "calib/mu_c": 0.010527272727272729, "calib/mu_w": 0.011724875621890547, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00078125, "calib/std_conf": 0.018586361021176834, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.264029304029304, "calib/step_q_c_n": 273.0, "calib/step_q_gap": 0.001200834278414331, "calib/step_q_w": 0.2628284697508897, "calib/step_q_w_n": 1405.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 882.0, "completions/max_terminated_length": 882.0, "completions/mean_length": 262.53125, "completions/mean_terminated_length": 263.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.11626666666666667, "grad_norm": 0.0347769632935524, "learning_rate": 2.5277777777777778e-06, "loss": 0.2498, "num_tokens": 21942035.0, "reward": 1.0020232200622559, "reward_std": 0.0043099867179989815, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7892027497291565, "rewards/format_reward_step": 1.0, "step": 109 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9149785321205854, "aux_distill/mean_u": 0.2944213236797313, "aux_distill/n_active_tok": 189.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5183720930232557, "calib/avg_num_step_conf": 5.92578125, "calib/ece": 0.14976470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006220930232558129, "calib/mean_conf": 0.00827450980392157, "calib/mu_c": 0.007750000000000002, "calib/mu_w": 0.008372093023255815, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000588235294117647, "calib/std_conf": 0.01403647689355264, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22549107142857144, "calib/step_q_c_n": 224.0, "calib/step_q_gap": -0.031654326869959115, "calib/step_q_w": 0.25714539829853056, "calib/step_q_w_n": 1293.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2691.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 269.8828125, "completions/mean_terminated_length": 269.8828125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.11733333333333333, "grad_norm": 0.024901842698454857, "learning_rate": 2.5e-06, "loss": 0.3063, "num_tokens": 22116045.0, "reward": 0.9971724152565002, "reward_std": 0.013311240822076797, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.84200119972229, "rewards/format_reward_step": 0.99609375, "step": 110 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9114784337580204, "aux_distill/mean_u": 0.3028130204678725, "aux_distill/n_active_tok": 196.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.514842796092796, "calib/avg_num_step_conf": 6.34765625, "calib/ece": 0.27483070866141734, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008017399267399271, "calib/mean_conf": 0.008633858267716536, "calib/mu_c": 0.009208333333333334, "calib/mu_w": 0.008406593406593407, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011517694162225249, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2838389830508475, "calib/step_q_c_n": 354.0, "calib/step_q_gap": 0.03348414434117003, "calib/step_q_w": 0.25035483870967745, "calib/step_q_w_n": 1271.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2737.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 255.3125, "completions/mean_terminated_length": 256.3137512207031, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.1184, "grad_norm": 0.022217007353901863, "learning_rate": 2.4722222222222226e-06, "loss": 0.3231, "num_tokens": 22288813.0, "reward": 0.994674563407898, "reward_std": 0.026389483362436295, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7159115672111511, "rewards/format_reward_step": 0.9921875, "step": 111 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9168477077037096, "aux_distill/mean_u": 0.3094974264635611, "aux_distill/n_active_tok": 191.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5288595594386096, "calib/avg_num_step_conf": 6.0625, "calib/ece": 0.2000078125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006604703039315915, "calib/mean_conf": 0.0070234375, "calib/mu_c": 0.007547169811320755, "calib/mu_w": 0.006886699507389163, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010927649023627807, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2449549180327869, "calib/step_q_c_n": 244.0, "calib/step_q_gap": 0.023272196320248656, "calib/step_q_w": 0.22168272171253825, "calib/step_q_w_n": 1308.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 260.26171875, "completions/mean_terminated_length": 261.2823791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.11946666666666667, "grad_norm": 0.04051944240927696, "learning_rate": 2.4444444444444447e-06, "loss": 0.2796, "num_tokens": 22463360.0, "reward": 1.0014780759811401, "reward_std": 0.0030970366206020117, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7959250211715698, "rewards/format_reward_step": 1.0, "step": 112 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9045503884553909, "aux_distill/mean_u": 0.27518384129040346, "aux_distill/n_active_tok": 184.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5271070869662419, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.1560156862745098, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0032783366867873923, "calib/mean_conf": 0.008690196078431373, "calib/mu_c": 0.01142857142857143, "calib/mu_w": 0.008150234741784038, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014898154312609919, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23245714285714286, "calib/step_q_c_n": 210.0, "calib/step_q_gap": -0.044223456511942316, "calib/step_q_w": 0.2766805993690852, "calib/step_q_w_n": 1268.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1069.0, "completions/max_terminated_length": 1069.0, "completions/mean_length": 238.265625, "completions/mean_terminated_length": 239.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.12053333333333334, "grad_norm": 0.049823589622974396, "learning_rate": 2.4166666666666667e-06, "loss": 0.274, "num_tokens": 22629556.0, "reward": 0.9978206157684326, "reward_std": 0.015029273927211761, "rewards/accuracy_reward_step": 0.1640625, "rewards/final_brier_reward_step": 0.8354849815368652, "rewards/format_reward_step": 0.99609375, "step": 113 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8853135239332914, "aux_distill/mean_u": 0.2820834696545772, "aux_distill/n_active_tok": 185.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5163684811572136, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.2709765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0013239436619718308, "calib/mean_conf": 0.0063671875, "calib/mu_c": 0.007323943661971831, "calib/mu_w": 0.006, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009168876612750535, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.254423076923077, "calib/step_q_c_n": 364.0, "calib/step_q_gap": -0.014064368964368856, "calib/step_q_w": 0.26848744588744583, "calib/step_q_w_n": 1155.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1002.0, "completions/max_terminated_length": 1002.0, "completions/mean_length": 248.54296875, "completions/mean_terminated_length": 249.5176544189453, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.1216, "grad_norm": 0.024916112422943115, "learning_rate": 2.388888888888889e-06, "loss": 0.2992, "num_tokens": 22798207.0, "reward": 1.0019688606262207, "reward_std": 0.004232214763760567, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7265941500663757, "rewards/format_reward_step": 1.0, "step": 114 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8834676668047905, "aux_distill/mean_u": 0.2663964808695613, "aux_distill/n_active_tok": 208.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5605055747789311, "calib/avg_num_step_conf": 6.52734375, "calib/ece": 0.19196078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0031862745098039224, "calib/mean_conf": 0.00803921568627451, "calib/mu_c": 0.010588235294117648, "calib/mu_w": 0.007401960784313726, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014527810766229086, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.352641975308642, "calib/step_q_c_n": 324.0, "calib/step_q_gap": 0.05986988919134434, "calib/step_q_w": 0.29277208611729766, "calib/step_q_w_n": 1347.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1174.0, "completions/max_terminated_length": 1174.0, "completions/mean_length": 273.64453125, "completions/mean_terminated_length": 274.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.12266666666666666, "grad_norm": 0.019408583641052246, "learning_rate": 2.361111111111111e-06, "loss": 0.2465, "num_tokens": 22973524.0, "reward": 0.9980658292770386, "reward_std": 0.015270305797457695, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8008191585540771, "rewards/format_reward_step": 0.99609375, "step": 115 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8767251344397664, "aux_distill/mean_u": 0.2608851315583497, "aux_distill/n_active_tok": 212.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5329034391534392, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.24080000000000001, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00045734126984127, "calib/mean_conf": 0.0062588235294117655, "calib/mu_c": 0.006603174603174604, "calib/mu_w": 0.006145833333333334, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010048298587237041, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27004804804804805, "calib/step_q_c_n": 333.0, "calib/step_q_gap": 0.026137229334597778, "calib/step_q_w": 0.24391081871345027, "calib/step_q_w_n": 1368.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2592.0, "completions/max_terminated_length": 2592.0, "completions/mean_length": 286.75, "completions/mean_terminated_length": 286.75, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.12373333333333333, "grad_norm": 0.03141540288925171, "learning_rate": 2.3333333333333336e-06, "loss": 0.2802, "num_tokens": 23151452.0, "reward": 0.9976489543914795, "reward_std": 0.014305486343801022, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.753110408782959, "rewards/format_reward_step": 0.99609375, "step": 116 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8591549377888441, "aux_distill/mean_u": 0.24090663719646957, "aux_distill/n_active_tok": 207.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.551553449183781, "calib/avg_num_step_conf": 6.515625, "calib/ece": 0.16746093749999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013901000526592946, "calib/mean_conf": 0.0083203125, "calib/mu_c": 0.01977777777777778, "calib/mu_w": 0.005876777251184834, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.03791323020928635, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30927450980392157, "calib/step_q_c_n": 306.0, "calib/step_q_gap": 0.040909752094670515, "calib/step_q_w": 0.26836475770925106, "calib/step_q_w_n": 1362.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 925.0, "completions/max_terminated_length": 925.0, "completions/mean_length": 280.37890625, "completions/mean_terminated_length": 281.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.1248, "grad_norm": 0.012358613312244415, "learning_rate": 2.305555555555556e-06, "loss": 0.2917, "num_tokens": 23329829.0, "reward": 1.002723217010498, "reward_std": 0.006827778648585081, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8296651840209961, "rewards/format_reward_step": 1.0, "step": 117 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8510748287662864, "aux_distill/mean_u": 0.23291316663587935, "aux_distill/n_active_tok": 196.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5326105886450714, "calib/avg_num_step_conf": 6.125, "calib/ece": 0.22282421875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0020231626610936956, "calib/mean_conf": 0.008582031249999999, "calib/mu_c": 0.007017241379310346, "calib/mu_w": 0.009040404040404041, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002421875, "calib/std_conf": 0.03934672979898104, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22057758620689655, "calib/step_q_c_n": 348.0, "calib/step_q_gap": -0.04127733182589033, "calib/step_q_w": 0.2618549180327869, "calib/step_q_w_n": 1220.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1122.0, "completions/max_terminated_length": 1122.0, "completions/mean_length": 262.4453125, "completions/mean_terminated_length": 263.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.12586666666666665, "grad_norm": 0.012375210411846638, "learning_rate": 2.277777777777778e-06, "loss": 0.2421, "num_tokens": 23501023.0, "reward": 1.0007789134979248, "reward_std": 0.005302540026605129, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.7749953269958496, "rewards/format_reward_step": 1.0, "step": 118 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9049360640347004, "aux_distill/mean_u": 0.28474286811428084, "aux_distill/n_active_tok": 187.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4892958892958894, "calib/avg_num_step_conf": 5.8671875, "calib/ece": 0.2501653543307087, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0032437932437932426, "calib/mean_conf": 0.00825984251968504, "calib/mu_c": 0.005846153846153846, "calib/mu_w": 0.009089947089947089, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0012598425196850393, "calib/std_conf": 0.022346664779041493, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25038108882521487, "calib/step_q_c_n": 349.0, "calib/step_q_gap": -0.061822727306615144, "calib/step_q_w": 0.31220381613183, "calib/step_q_w_n": 1153.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 949.0, "completions/max_terminated_length": 949.0, "completions/mean_length": 262.94921875, "completions/mean_terminated_length": 263.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.12693333333333334, "grad_norm": 0.01229011919349432, "learning_rate": 2.25e-06, "loss": 0.2588, "num_tokens": 23673402.0, "reward": 0.9933902621269226, "reward_std": 0.025129135698080063, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.74068683385849, "rewards/format_reward_step": 0.9921875, "step": 119 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8911927230656147, "aux_distill/mean_u": 0.27544143435489915, "aux_distill/n_active_tok": 195.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4726834724253635, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.22398828125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013732255011614887, "calib/mean_conf": 0.006480468749999999, "calib/mu_c": 0.005423728813559323, "calib/mu_w": 0.006796954314720812, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008836603195248354, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2671947194719472, "calib/step_q_c_n": 303.0, "calib/step_q_gap": 0.0029127717284792576, "calib/step_q_w": 0.2642819477434679, "calib/step_q_w_n": 1263.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 260.39453125, "completions/mean_terminated_length": 261.41571044921875, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.128, "grad_norm": 0.01112359669059515, "learning_rate": 2.222222222222222e-06, "loss": 0.2705, "num_tokens": 23846751.0, "reward": 0.9972838759422302, "reward_std": 0.013594256713986397, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7680052518844604, "rewards/format_reward_step": 0.99609375, "step": 120 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9054578468203545, "aux_distill/mean_u": 0.27849697061773127, "aux_distill/n_active_tok": 196.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.523109243697479, "calib/avg_num_step_conf": 6.1328125, "calib/ece": 0.18948616600790513, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020778311324529813, "calib/mean_conf": 0.005059288537549407, "calib/mu_c": 0.0067346938775510205, "calib/mu_w": 0.004656862745098039, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00043478260869565214, "calib/std_conf": 0.010122590607178607, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.1908032128514056, "calib/step_q_c_n": 249.0, "calib/step_q_gap": -0.04857680228863981, "calib/step_q_w": 0.23938001514004542, "calib/step_q_w_n": 1321.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 278.1640625, "completions/mean_terminated_length": 279.2549133300781, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.12906666666666666, "grad_norm": 0.01199932862073183, "learning_rate": 2.1944444444444445e-06, "loss": 0.2978, "num_tokens": 24023017.0, "reward": 0.9895070195198059, "reward_std": 0.03511889651417732, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.7993265390396118, "rewards/format_reward_step": 0.98828125, "step": 121 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9337432067841291, "aux_distill/mean_u": 0.34765335554757254, "aux_distill/n_active_tok": 192.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46111111111111114, "calib/avg_num_step_conf": 6.015625, "calib/ece": 0.17176470588235296, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001126984126984127, "calib/mean_conf": 0.004705882352941177, "calib/mu_c": 0.003777777777777778, "calib/mu_w": 0.004904761904761905, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007071883343592063, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32420078740157476, "calib/step_q_c_n": 254.0, "calib/step_q_gap": 0.05633997869239904, "calib/step_q_w": 0.2678608087091757, "calib/step_q_w_n": 1286.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1603.0, "completions/max_terminated_length": 1603.0, "completions/mean_length": 260.8125, "completions/mean_terminated_length": 261.8352966308594, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.13013333333333332, "grad_norm": 0.013110945001244545, "learning_rate": 2.166666666666667e-06, "loss": 0.2786, "num_tokens": 24197129.0, "reward": 0.9967218637466431, "reward_std": 0.012659726664423943, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.8215687274932861, "rewards/format_reward_step": 0.99609375, "step": 122 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9081991314888, "aux_distill/mean_u": 0.3049377110123672, "aux_distill/n_active_tok": 191.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46956362302896953, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.20609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00218921892189219, "calib/mean_conf": 0.006171875, "calib/mu_c": 0.0044444444444444444, "calib/mu_w": 0.006633663366336634, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006640625, "calib/std_conf": 0.013841801146685175, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.250663082437276, "calib/step_q_c_n": 279.0, "calib/step_q_gap": -0.02002242711686414, "calib/step_q_w": 0.2706855095541401, "calib/step_q_w_n": 1256.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 824.0, "completions/max_terminated_length": 824.0, "completions/mean_length": 273.9453125, "completions/mean_terminated_length": 275.0196228027344, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.1312, "grad_norm": 0.012809602543711662, "learning_rate": 2.138888888888889e-06, "loss": 0.2877, "num_tokens": 24372547.0, "reward": 1.0008225440979004, "reward_std": 0.002213716506958008, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7907078266143799, "rewards/format_reward_step": 1.0, "step": 123 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9068530723452568, "aux_distill/mean_u": 0.24844131723727766, "aux_distill/n_active_tok": 165.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5376205520452277, "calib/avg_num_step_conf": 5.203125, "calib/ece": 0.23624609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0010989358164283343, "calib/mean_conf": 0.00594140625, "calib/mu_c": 0.006774193548387097, "calib/mu_w": 0.005675257731958763, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007696417869532613, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3619039039039039, "calib/step_q_c_n": 333.0, "calib/step_q_gap": 0.111948948948949, "calib/step_q_w": 0.24995495495495493, "calib/step_q_w_n": 999.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 611.0, "completions/max_terminated_length": 611.0, "completions/mean_length": 237.73046875, "completions/mean_terminated_length": 238.6627655029297, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.13226666666666667, "grad_norm": 0.011809157207608223, "learning_rate": 2.1111111111111114e-06, "loss": 0.2786, "num_tokens": 24540222.0, "reward": 1.0015933513641357, "reward_std": 0.0029463740065693855, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7609992027282715, "rewards/format_reward_step": 1.0, "step": 124 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8929617684334517, "aux_distill/mean_u": 0.2558508365630142, "aux_distill/n_active_tok": 174.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48128957973517555, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.24052421875000002, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013656879677605071, "calib/mean_conf": 0.006585156250000001, "calib/mu_c": 0.005555555555555556, "calib/mu_w": 0.006921243523316063, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005078125, "calib/std_conf": 0.012401295231268625, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32620000000000005, "calib/step_q_c_n": 300.0, "calib/step_q_gap": 0.02711415525114158, "calib/step_q_w": 0.29908584474885846, "calib/step_q_w_n": 1095.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 905.0, "completions/max_terminated_length": 905.0, "completions/mean_length": 250.09765625, "completions/mean_terminated_length": 251.0784454345703, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.13333333333333333, "grad_norm": 0.012777113355696201, "learning_rate": 2.0833333333333334e-06, "loss": 0.2809, "num_tokens": 24709055.0, "reward": 1.0012686252593994, "reward_std": 0.0029744510538876057, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7564435005187988, "rewards/format_reward_step": 1.0, "step": 125 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8970666322857141, "aux_distill/mean_u": 0.2859182237069479, "aux_distill/n_active_tok": 183.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5622351206928322, "calib/avg_num_step_conf": 5.71875, "calib/ece": 0.2055921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00180154781647319, "calib/mean_conf": 0.006172549019607843, "calib/mu_c": 0.0075925925925925935, "calib/mu_w": 0.0057910447761194035, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008019683589459628, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3376099585062241, "calib/step_q_c_n": 241.0, "calib/step_q_gap": 0.06509564943018159, "calib/step_q_w": 0.2725143090760425, "calib/step_q_w_n": 1223.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 678.0, "completions/max_terminated_length": 678.0, "completions/mean_length": 254.7890625, "completions/mean_terminated_length": 255.7882537841797, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1344, "grad_norm": 0.010087775066494942, "learning_rate": 2.0555555555555555e-06, "loss": 0.281, "num_tokens": 24879745.0, "reward": 0.993738055229187, "reward_std": 0.02483932301402092, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.784351110458374, "rewards/format_reward_step": 0.9921875, "step": 126 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8760164938867092, "aux_distill/mean_u": 0.25388843472722616, "aux_distill/n_active_tok": 181.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5213604693802714, "calib/avg_num_step_conf": 5.7421875, "calib/ece": 0.20528125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008111477814448107, "calib/mean_conf": 0.00565625, "calib/mu_c": 0.006296296296296296, "calib/mu_w": 0.005485148514851486, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008934166773544134, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2709774436090226, "calib/step_q_c_n": 266.0, "calib/step_q_gap": 0.003449370519321593, "calib/step_q_w": 0.267528073089701, "calib/step_q_w_n": 1204.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1207.0, "completions/max_terminated_length": 1207.0, "completions/mean_length": 248.9765625, "completions/mean_terminated_length": 249.9529571533203, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.13546666666666668, "grad_norm": 0.010908103547990322, "learning_rate": 2.027777777777778e-06, "loss": 0.2468, "num_tokens": 25047155.0, "reward": 0.9973659515380859, "reward_std": 0.013409133069217205, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7877006530761719, "rewards/format_reward_step": 0.99609375, "step": 127 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9018064886331558, "aux_distill/mean_u": 0.31848810168209557, "aux_distill/n_active_tok": 188.875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.518262987012987, "calib/avg_num_step_conf": 6.19140625, "calib/ece": 0.21394566929133857, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018440836940836923, "calib/mean_conf": 0.006526771653543308, "calib/mu_c": 0.007964285714285714, "calib/mu_w": 0.006120202020202021, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010045008544764575, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22672142857142857, "calib/step_q_c_n": 280.0, "calib/step_q_gap": -0.05722033388067871, "calib/step_q_w": 0.2839417624521073, "calib/step_q_w_n": 1305.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2661.0, "completions/max_terminated_length": 2661.0, "completions/mean_length": 287.5, "completions/mean_terminated_length": 288.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.13653333333333334, "grad_norm": 0.010613925755023956, "learning_rate": 2.0000000000000003e-06, "loss": 0.3066, "num_tokens": 25227419.0, "reward": 0.9938584566116333, "reward_std": 0.025512343272566795, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7767794728279114, "rewards/format_reward_step": 0.9921875, "step": 128 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8973717913031578, "aux_distill/mean_u": 0.28847371241550984, "aux_distill/n_active_tok": 185.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5291068580542264, "calib/avg_num_step_conf": 5.80859375, "calib/ece": 0.2196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001554492291334397, "calib/mean_conf": 0.0084, "calib/mu_c": 0.007192982456140352, "calib/mu_w": 0.008747474747474749, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002235294117647059, "calib/std_conf": 0.027026290758386782, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.31616983050847464, "calib/step_q_c_n": 295.0, "calib/step_q_gap": -0.009838558753270332, "calib/step_q_w": 0.32600838926174497, "calib/step_q_w_n": 1192.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1148.0, "completions/max_terminated_length": 1148.0, "completions/mean_length": 250.01953125, "completions/mean_terminated_length": 251.00001525878906, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1376, "grad_norm": 0.011224478483200073, "learning_rate": 1.9722222222222224e-06, "loss": 0.2615, "num_tokens": 25393808.0, "reward": 0.9972963333129883, "reward_std": 0.014956073835492134, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7758427858352661, "rewards/format_reward_step": 0.99609375, "step": 129 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8825326859951019, "aux_distill/mean_u": 0.2890509668456145, "aux_distill/n_active_tok": 200.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4762126077404091, "calib/avg_num_step_conf": 6.25, "calib/ece": 0.2328235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00021379077235085184, "calib/mean_conf": 0.006392156862745099, "calib/mu_c": 0.006229508196721313, "calib/mu_w": 0.006443298969072165, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009220774565254115, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2656804733727811, "calib/step_q_c_n": 338.0, "calib/step_q_gap": -0.05083117480471494, "calib/step_q_w": 0.31651164817749605, "calib/step_q_w_n": 1262.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2230.0, "completions/max_terminated_length": 2230.0, "completions/mean_length": 263.3515625, "completions/mean_terminated_length": 263.3515625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.13866666666666666, "grad_norm": 0.010412861593067646, "learning_rate": 1.944444444444445e-06, "loss": 0.3018, "num_tokens": 25566514.0, "reward": 0.9975153803825378, "reward_std": 0.014174779877066612, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7606558203697205, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8887091083452106, "aux_distill/mean_u": 0.24683482745846644, "aux_distill/n_active_tok": 180.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48694612024131473, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.17593411764705885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00349727480757229, "calib/mean_conf": 0.008301176470588237, "calib/mu_c": 0.005434782608695652, "calib/mu_w": 0.008932057416267942, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0019215686274509803, "calib/std_conf": 0.03141364195022127, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3232857142857143, "calib/step_q_c_n": 210.0, "calib/step_q_gap": 0.020082764210071324, "calib/step_q_w": 0.30320295007564296, "calib/step_q_w_n": 1322.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 932.0, "completions/max_terminated_length": 932.0, "completions/mean_length": 244.96484375, "completions/mean_terminated_length": 245.92550659179688, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.13973333333333332, "grad_norm": 0.01468158233910799, "learning_rate": 1.916666666666667e-06, "loss": 0.2812, "num_tokens": 25735433.0, "reward": 0.9965444803237915, "reward_std": 0.014411761425435543, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.817307710647583, "rewards/format_reward_step": 0.99609375, "step": 131 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8533799685537815, "aux_distill/mean_u": 0.3045404417038223, "aux_distill/n_active_tok": 186.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4730164158686731, "calib/avg_num_step_conf": 5.8359375, "calib/ece": 0.32980234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001534309165526676, "calib/mean_conf": 0.00613515625, "calib/mu_c": 0.005116279069767442, "calib/mu_w": 0.006650588235294118, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010051524214669432, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.32058595641646487, "calib/step_q_c_n": 413.0, "calib/step_q_gap": 0.0213463634469922, "calib/step_q_w": 0.29923959296947267, "calib/step_q_w_n": 1081.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 680.0, "completions/max_terminated_length": 680.0, "completions/mean_length": 254.4765625, "completions/mean_terminated_length": 255.47451782226562, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.1408, "grad_norm": 0.01189375203102827, "learning_rate": 1.888888888888889e-06, "loss": 0.2757, "num_tokens": 25906171.0, "reward": 1.0016493797302246, "reward_std": 0.0029917373321950436, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.667361319065094, "rewards/format_reward_step": 1.0, "step": 132 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9298758413642645, "aux_distill/mean_u": 0.31848106660529757, "aux_distill/n_active_tok": 199.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49122422114962705, "calib/avg_num_step_conf": 6.2265625, "calib/ece": 0.1622392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0025775559455901726, "calib/mean_conf": 0.00795686274509804, "calib/mu_c": 0.005813953488372093, "calib/mu_w": 0.008391509433962266, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0007843137254901962, "calib/std_conf": 0.015985351810660065, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.35344493392070486, "calib/step_q_c_n": 227.0, "calib/step_q_gap": 0.05997748695654975, "calib/step_q_w": 0.2934674469641551, "calib/step_q_w_n": 1367.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 289.73046875, "completions/mean_terminated_length": 289.73046875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "epoch": 0.14186666666666667, "grad_norm": 0.01119232177734375, "learning_rate": 1.8611111111111113e-06, "loss": 0.3108, "num_tokens": 26086686.0, "reward": 0.9969115257263184, "reward_std": 0.013512922450900078, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.8297605514526367, "rewards/format_reward_step": 0.99609375, "step": 133 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9139052350074053, "aux_distill/mean_u": 0.29659186276340005, "aux_distill/n_active_tok": 192.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.45802995631370913, "calib/avg_num_step_conf": 6.12890625, "calib/ece": 0.17641755366098244, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004203463236600387, "calib/mean_conf": 0.00844519143705679, "calib/mu_c": 0.005, "calib/mu_w": 0.009203463236600387, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0022352941176470584, "calib/std_conf": 0.03632257550847606, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24870546875, "calib/step_q_c_n": 256.0, "calib/step_q_gap": -0.05996551373286363, "calib/step_q_w": 0.3086709824828636, "calib/step_q_w_n": 1313.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 277.6015625, "completions/mean_terminated_length": 278.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.14293333333333333, "grad_norm": 0.01119636744260788, "learning_rate": 1.8333333333333333e-06, "loss": 0.2704, "num_tokens": 26266704.0, "reward": 0.9962995648384094, "reward_std": 0.014586620032787323, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8168178796768188, "rewards/format_reward_step": 0.99609375, "step": 134 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9191968068480492, "aux_distill/mean_u": 0.2777864922801156, "aux_distill/n_active_tok": 197.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4424755355725998, "calib/avg_num_step_conf": 6.20703125, "calib/ece": 0.220390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.0060897469805166185, "calib/mean_conf": 0.009453125, "calib/mu_c": 0.004719298245614035, "calib/mu_w": 0.010809045226130654, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00359375, "calib/std_conf": 0.05792726573241979, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2797987804878049, "calib/step_q_c_n": 328.0, "calib/step_q_gap": -0.027209149726310833, "calib/step_q_w": 0.3070079302141157, "calib/step_q_w_n": 1261.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1090.0, "completions/max_terminated_length": 1090.0, "completions/mean_length": 285.00390625, "completions/mean_terminated_length": 286.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.144, "grad_norm": 0.011457856744527817, "learning_rate": 1.8055555555555557e-06, "loss": 0.2915, "num_tokens": 26445545.0, "reward": 0.9993282556533813, "reward_std": 0.007239358965307474, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.7760003805160522, "rewards/format_reward_step": 1.0, "step": 135 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8874705582857132, "aux_distill/mean_u": 0.2744993750846104, "aux_distill/n_active_tok": 193.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.57156432748538, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.29486328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.0036798245614035096, "calib/mean_conf": 0.009824218750000002, "calib/mu_c": 0.007236842105263159, "calib/mu_w": 0.010916666666666668, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00390625, "calib/std_conf": 0.06271242306913478, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2921197916666667, "calib/step_q_c_n": 384.0, "calib/step_q_gap": 0.0010414937943262759, "calib/step_q_w": 0.2910782978723404, "calib/step_q_w_n": 1175.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 831.0, "completions/max_terminated_length": 831.0, "completions/mean_length": 266.30078125, "completions/mean_terminated_length": 267.3451232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.14506666666666668, "grad_norm": 0.011349204927682877, "learning_rate": 1.777777777777778e-06, "loss": 0.2561, "num_tokens": 26622206.0, "reward": 1.000133752822876, "reward_std": 0.008982696570456028, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.703392505645752, "rewards/format_reward_step": 1.0, "step": 136 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8992571644484997, "aux_distill/mean_u": 0.2754937155461978, "aux_distill/n_active_tok": 185.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.51244918699187, "calib/avg_num_step_conf": 5.8125, "calib/ece": 0.18371541501976282, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00029878048780487745, "calib/mean_conf": 0.006007905138339921, "calib/mu_c": 0.0062499999999999995, "calib/mu_w": 0.005951219512195122, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007186940337990243, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3092271062271062, "calib/step_q_c_n": 273.0, "calib/step_q_gap": 0.046130809930809924, "calib/step_q_w": 0.2630962962962963, "calib/step_q_w_n": 1215.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1103.0, "completions/max_terminated_length": 1103.0, "completions/mean_length": 262.55078125, "completions/mean_terminated_length": 265.6640319824219, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.14613333333333334, "grad_norm": 0.0104745551943779, "learning_rate": 1.75e-06, "loss": 0.2451, "num_tokens": 26796403.0, "reward": 0.9894097447395325, "reward_std": 0.027691062539815903, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8030382990837097, "rewards/format_reward_step": 0.98828125, "step": 137 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8681779894977808, "aux_distill/mean_u": 0.2406305521502327, "aux_distill/n_active_tok": 180.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4716071428571429, "calib/avg_num_step_conf": 5.671875, "calib/ece": 0.21349609375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005535714285714293, "calib/mean_conf": 0.00525390625, "calib/mu_c": 0.004821428571428572, "calib/mu_w": 0.005375000000000001, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007384129966774077, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2772347266881029, "calib/step_q_c_n": 311.0, "calib/step_q_gap": -0.034996736940293205, "calib/step_q_w": 0.3122314636283961, "calib/step_q_w_n": 1141.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 754.0, "completions/max_terminated_length": 754.0, "completions/mean_length": 244.9453125, "completions/mean_terminated_length": 245.90589904785156, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1472, "grad_norm": 0.011168470606207848, "learning_rate": 1.7222222222222224e-06, "loss": 0.2743, "num_tokens": 26963445.0, "reward": 1.0010135173797607, "reward_std": 0.0024566391948610544, "rewards/accuracy_reward_step": 0.21875, "rewards/final_brier_reward_step": 0.7832772731781006, "rewards/format_reward_step": 1.0, "step": 138 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.865342266857624, "aux_distill/mean_u": 0.27075061252326454, "aux_distill/n_active_tok": 177.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5913461538461537, "calib/avg_num_step_conf": 5.56640625, "calib/ece": 0.19644921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0024822775263951746, "calib/mean_conf": 0.00667578125, "calib/mu_c": 0.008653846153846154, "calib/mu_w": 0.00617156862745098, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007693364085505666, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2794509803921568, "calib/step_q_c_n": 255.0, "calib/step_q_gap": -0.01616978883861242, "calib/step_q_w": 0.29562076923076924, "calib/step_q_w_n": 1170.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 752.0, "completions/max_terminated_length": 752.0, "completions/mean_length": 251.046875, "completions/mean_terminated_length": 252.03138732910156, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.14826666666666666, "grad_norm": 0.014333439990878105, "learning_rate": 1.6944444444444446e-06, "loss": 0.2661, "num_tokens": 27130809.0, "reward": 1.0017058849334717, "reward_std": 0.0031183804385364056, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.8002868294715881, "rewards/format_reward_step": 1.0, "step": 139 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8883232790976763, "aux_distill/mean_u": 0.24898915879042913, "aux_distill/n_active_tok": 178.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5332845052083333, "calib/avg_num_step_conf": 5.62109375, "calib/ece": 0.2438671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009895833333333336, "calib/mean_conf": 0.006132812499999999, "calib/mu_c": 0.006875000000000001, "calib/mu_w": 0.005885416666666667, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007145662029500398, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2609285714285714, "calib/step_q_c_n": 294.0, "calib/step_q_gap": -0.03759893948845916, "calib/step_q_w": 0.29852751091703056, "calib/step_q_w_n": 1145.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 794.0, "completions/max_terminated_length": 794.0, "completions/mean_length": 249.2421875, "completions/mean_terminated_length": 250.21961975097656, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.14933333333333335, "grad_norm": 0.010594126768410206, "learning_rate": 1.6666666666666667e-06, "loss": 0.2774, "num_tokens": 27299631.0, "reward": 1.0016744136810303, "reward_std": 0.0027817150112241507, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7533488273620605, "rewards/format_reward_step": 1.0, "step": 140 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.868485214188695, "aux_distill/mean_u": 0.2833877031353884, "aux_distill/n_active_tok": 171.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5427070197562001, "calib/avg_num_step_conf": 5.48828125, "calib/ece": 0.2321875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008238755779739378, "calib/mean_conf": 0.006093749999999999, "calib/mu_c": 0.006721311475409836, "calib/mu_w": 0.0058974358974358985, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007366984521328926, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26901041666666664, "calib/step_q_c_n": 288.0, "calib/step_q_gap": 0.005483290435690791, "calib/step_q_w": 0.26352712623097585, "calib/step_q_w_n": 1117.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1212.0, "completions/max_terminated_length": 1212.0, "completions/mean_length": 269.90625, "completions/mean_terminated_length": 270.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.1504, "grad_norm": 0.011970674619078636, "learning_rate": 1.638888888888889e-06, "loss": 0.2512, "num_tokens": 27475823.0, "reward": 1.0015559196472168, "reward_std": 0.0027788393199443817, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.764830470085144, "rewards/format_reward_step": 1.0, "step": 141 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8942771572619677, "aux_distill/mean_u": 0.28075573797004366, "aux_distill/n_active_tok": 180.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.510369532428356, "calib/avg_num_step_conf": 5.7109375, "calib/ece": 0.1976796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004064856711915544, "calib/mean_conf": 0.0054453125000000005, "calib/mu_c": 0.00576923076923077, "calib/mu_w": 0.005362745098039216, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006972948391989127, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30242448979591835, "calib/step_q_c_n": 245.0, "calib/step_q_gap": 0.044435171800848494, "calib/step_q_w": 0.25798931799506986, "calib/step_q_w_n": 1217.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 991.0, "completions/max_terminated_length": 991.0, "completions/mean_length": 260.453125, "completions/mean_terminated_length": 261.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 56.0, "epoch": 0.15146666666666667, "grad_norm": 0.010074146091938019, "learning_rate": 1.6111111111111113e-06, "loss": 0.2714, "num_tokens": 27647659.0, "reward": 1.0011327266693115, "reward_std": 0.002334078773856163, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.799140453338623, "rewards/format_reward_step": 1.0, "step": 142 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8822210561484098, "aux_distill/mean_u": 0.2640907637767747, "aux_distill/n_active_tok": 184.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5635157855408182, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.20162509803921566, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001717093218755841, "calib/mean_conf": 0.00739450980392157, "calib/mu_c": 0.008754716981132078, "calib/mu_w": 0.007037623762376237, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000588235294117647, "calib/std_conf": 0.01207094588520839, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2553433333333333, "calib/step_q_c_n": 300.0, "calib/step_q_gap": 0.014744352874539762, "calib/step_q_w": 0.24059898045879355, "calib/step_q_w_n": 1177.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 270.42578125, "completions/mean_terminated_length": 270.42578125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.15253333333333333, "grad_norm": 0.011427761986851692, "learning_rate": 1.5833333333333333e-06, "loss": 0.3089, "num_tokens": 27824224.0, "reward": 0.9939003586769104, "reward_std": 0.025681499391794205, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7885819673538208, "rewards/format_reward_step": 0.9921875, "step": 143 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8531425483524799, "aux_distill/mean_u": 0.2394903155267863, "aux_distill/n_active_tok": 172.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5109019886363637, "calib/avg_num_step_conf": 5.46484375, "calib/ece": 0.309479296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004416477272727273, "calib/mean_conf": 0.009348828125, "calib/mu_c": 0.0063125, "calib/mu_w": 0.010728977272727274, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0031640625, "calib/std_conf": 0.03690465648805999, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2530695443645084, "calib/step_q_c_n": 417.0, "calib/step_q_gap": -0.025420272336102556, "calib/step_q_w": 0.27848981670061096, "calib/step_q_w_n": 982.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 949.0, "completions/max_terminated_length": 949.0, "completions/mean_length": 237.46484375, "completions/mean_terminated_length": 238.39608764648438, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.1536, "grad_norm": 0.014708510600030422, "learning_rate": 1.5555555555555558e-06, "loss": 0.2486, "num_tokens": 27989143.0, "reward": 1.0012478828430176, "reward_std": 0.004646037705242634, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6899959444999695, "rewards/format_reward_step": 1.0, "step": 144 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8793023275211453, "aux_distill/mean_u": 0.2573759158547438, "aux_distill/n_active_tok": 170.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4719298245614035, "calib/avg_num_step_conf": 5.41796875, "calib/ece": 0.2891796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012134502923976604, "calib/mean_conf": 0.0076953125, "calib/mu_c": 0.006842105263157895, "calib/mu_w": 0.008055555555555555, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0076903537322637995, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27508333333333335, "calib/step_q_c_n": 360.0, "calib/step_q_gap": -0.0038816131126256925, "calib/step_q_w": 0.27896494644595904, "calib/step_q_w_n": 1027.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 787.0, "completions/max_terminated_length": 787.0, "completions/mean_length": 235.2734375, "completions/mean_terminated_length": 236.1960906982422, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.15466666666666667, "grad_norm": 0.01416355837136507, "learning_rate": 1.527777777777778e-06, "loss": 0.2677, "num_tokens": 28152077.0, "reward": 0.9980659484863281, "reward_std": 0.014034947380423546, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7031632661819458, "rewards/format_reward_step": 0.99609375, "step": 145 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9093922805041075, "aux_distill/mean_u": 0.2503467252894827, "aux_distill/n_active_tok": 187.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5753644939965694, "calib/avg_num_step_conf": 5.9765625, "calib/ece": 0.165296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015523156089193857, "calib/mean_conf": 0.006578125, "calib/mu_c": 0.007863636363636366, "calib/mu_w": 0.00631132075471698, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007265188158910614, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25840707964601767, "calib/step_q_c_n": 226.0, "calib/step_q_gap": -0.02287896329876765, "calib/step_q_w": 0.2812860429447853, "calib/step_q_w_n": 1304.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 266.078125, "completions/mean_terminated_length": 267.12158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.15573333333333333, "grad_norm": 0.01321055181324482, "learning_rate": 1.5e-06, "loss": 0.2996, "num_tokens": 28327409.0, "reward": 0.9973971843719482, "reward_std": 0.01355915330350399, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8268258571624756, "rewards/format_reward_step": 0.99609375, "step": 146 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8763753715902567, "aux_distill/mean_u": 0.23976348102847864, "aux_distill/n_active_tok": 160.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5509268292682927, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.18992549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0015760975609756091, "calib/mean_conf": 0.0061529411764705885, "calib/mu_c": 0.0074199999999999995, "calib/mu_w": 0.00584390243902439, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.006925099360543684, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.22452479338842976, "calib/step_q_c_n": 242.0, "calib/step_q_gap": -0.025266753201387743, "calib/step_q_w": 0.2497915465898175, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 244.484375, "completions/mean_terminated_length": 244.484375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.1568, "grad_norm": 0.012709351256489754, "learning_rate": 1.4722222222222225e-06, "loss": 0.285, "num_tokens": 28493677.0, "reward": 0.9955471158027649, "reward_std": 0.018829353153705597, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.8035942316055298, "rewards/format_reward_step": 0.9921875, "step": 147 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8877765387296677, "aux_distill/mean_u": 0.2964384086564609, "aux_distill/n_active_tok": 163.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4963146609488074, "calib/avg_num_step_conf": 5.09375, "calib/ece": 0.35081960784313726, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0003430045564191903, "calib/mean_conf": 0.006043137254901961, "calib/mu_c": 0.006263736263736264, "calib/mu_w": 0.005920731707317074, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007692248578721788, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2919807228915663, "calib/step_q_c_n": 415.0, "calib/step_q_gap": 0.07896497486007026, "calib/step_q_w": 0.21301574803149603, "calib/step_q_w_n": 889.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 232.33984375, "completions/mean_terminated_length": 233.25099182128906, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.15786666666666666, "grad_norm": 0.012826824560761452, "learning_rate": 1.4444444444444445e-06, "loss": 0.2879, "num_tokens": 28658268.0, "reward": 0.9982725977897644, "reward_std": 0.014345245435833931, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6449828147888184, "rewards/format_reward_step": 0.99609375, "step": 148 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8741916511207819, "aux_distill/mean_u": 0.23303331402852917, "aux_distill/n_active_tok": 177.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5589198036006546, "calib/avg_num_step_conf": 5.81640625, "calib/ece": 0.17889019607843135, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005831628477905061, "calib/mean_conf": 0.008560784313725492, "calib/mu_c": 0.008085106382978725, "calib/mu_w": 0.008668269230769231, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.02595023955030445, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.277008547008547, "calib/step_q_c_n": 234.0, "calib/step_q_gap": 0.023315319916913546, "calib/step_q_w": 0.25369322709163344, "calib/step_q_w_n": 1255.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1185.0, "completions/max_terminated_length": 1185.0, "completions/mean_length": 260.0, "completions/mean_terminated_length": 261.0196228027344, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.15893333333333334, "grad_norm": 0.014396457001566887, "learning_rate": 1.4166666666666667e-06, "loss": 0.2461, "num_tokens": 28829284.0, "reward": 0.9936124086380005, "reward_std": 0.024772923439741135, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8114436864852905, "rewards/format_reward_step": 0.9921875, "step": 149 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9308585114777088, "aux_distill/mean_u": 0.2825607385398316, "aux_distill/n_active_tok": 161.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5184814814814814, "calib/avg_num_step_conf": 5.40625, "calib/ece": 0.28629019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006411111111111105, "calib/mean_conf": 0.007827450980392158, "calib/mu_c": 0.00828, "calib/mu_w": 0.007638888888888889, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007624112379872847, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26277070063694263, "calib/step_q_c_n": 314.0, "calib/step_q_gap": 0.009795934281802476, "calib/step_q_w": 0.25297476635514016, "calib/step_q_w_n": 1070.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 843.0, "completions/max_terminated_length": 843.0, "completions/mean_length": 222.9921875, "completions/mean_terminated_length": 223.86668395996094, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.16, "grad_norm": 0.013206169940531254, "learning_rate": 1.3888888888888892e-06, "loss": 0.2793, "num_tokens": 28991330.0, "reward": 0.998460054397583, "reward_std": 0.014980091713368893, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.707857608795166, "rewards/format_reward_step": 0.99609375, "step": 150 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9185364572331309, "aux_distill/mean_u": 0.27937537430025083, "aux_distill/n_active_tok": 170.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5205164992826399, "calib/avg_num_step_conf": 5.3359375, "calib/ece": 0.19266015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0011143950263032042, "calib/mean_conf": 0.006558593749999999, "calib/mu_c": 0.007450980392156863, "calib/mu_w": 0.006336585365853659, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007692610367908993, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23504098360655734, "calib/step_q_c_n": 244.0, "calib/step_q_gap": -0.01838593261447652, "calib/step_q_w": 0.25342691622103386, "calib/step_q_w_n": 1122.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 757.0, "completions/max_terminated_length": 757.0, "completions/mean_length": 243.59765625, "completions/mean_terminated_length": 244.55296325683594, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.16106666666666666, "grad_norm": 0.012316438369452953, "learning_rate": 1.3611111111111112e-06, "loss": 0.279, "num_tokens": 29160715.0, "reward": 1.001433253288269, "reward_std": 0.0031007230281829834, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8036477565765381, "rewards/format_reward_step": 1.0, "step": 151 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9062818996608257, "aux_distill/mean_u": 0.23632444722897225, "aux_distill/n_active_tok": 161.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4878723832528181, "calib/avg_num_step_conf": 5.03515625, "calib/ece": 0.17980392156862746, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00012077294685990288, "calib/mean_conf": 0.008431372549019609, "calib/mu_c": 0.008333333333333333, "calib/mu_w": 0.008454106280193236, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008441853950176418, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2364607843137255, "calib/step_q_c_n": 204.0, "calib/step_q_gap": -0.02490880093973072, "calib/step_q_w": 0.2613695852534562, "calib/step_q_w_n": 1085.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1983.0, "completions/max_terminated_length": 1983.0, "completions/mean_length": 248.84375, "completions/mean_terminated_length": 248.84375, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.16213333333333332, "grad_norm": 0.011517630890011787, "learning_rate": 1.3333333333333334e-06, "loss": 0.3101, "num_tokens": 29329811.0, "reward": 0.9975853562355042, "reward_std": 0.014188062399625778, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8115769624710083, "rewards/format_reward_step": 0.99609375, "step": 152 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.94620705768466, "aux_distill/mean_u": 0.3316092887910609, "aux_distill/n_active_tok": 176.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.474765312761409, "calib/avg_num_step_conf": 5.55859375, "calib/ece": 0.19921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0010484245747746056, "calib/mean_conf": 0.0078125, "calib/mu_c": 0.006981132075471699, "calib/mu_w": 0.008029556650246305, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008518133231524382, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2650373134328358, "calib/step_q_c_n": 268.0, "calib/step_q_gap": -0.020681301285778886, "calib/step_q_w": 0.2857186147186147, "calib/step_q_w_n": 1155.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 688.0, "completions/max_terminated_length": 688.0, "completions/mean_length": 260.65625, "completions/mean_terminated_length": 261.6784362792969, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.1632, "grad_norm": 0.013415186665952206, "learning_rate": 1.3055555555555556e-06, "loss": 0.3176, "num_tokens": 29503859.0, "reward": 1.0013785362243652, "reward_std": 0.0025912360288202763, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7957258224487305, "rewards/format_reward_step": 1.0, "step": 153 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8725715447217226, "aux_distill/mean_u": 0.2504315987939681, "aux_distill/n_active_tok": 153.125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5310153316645807, "calib/avg_num_step_conf": 4.80078125, "calib/ece": 0.25696484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0012237171464330454, "calib/mean_conf": 0.008660156249999999, "calib/mu_c": 0.009558823529411767, "calib/mu_w": 0.008335106382978722, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00777853456478699, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3326111111111111, "calib/step_q_c_n": 306.0, "calib/step_q_gap": 0.02233375466474058, "calib/step_q_w": 0.3102773564463705, "calib/step_q_w_n": 923.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 621.0, "completions/max_terminated_length": 621.0, "completions/mean_length": 225.49609375, "completions/mean_terminated_length": 226.38040161132812, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.16426666666666667, "grad_norm": 0.015053332783281803, "learning_rate": 1.2777777777777779e-06, "loss": 0.2345, "num_tokens": 29666026.0, "reward": 1.0024712085723877, "reward_std": 0.004337704740464687, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7393175959587097, "rewards/format_reward_step": 1.0, "step": 154 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8818569201976061, "aux_distill/mean_u": 0.25869740968154836, "aux_distill/n_active_tok": 157.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5340544871794871, "calib/avg_num_step_conf": 4.93359375, "calib/ece": 0.1779375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.5641025641027354e-05, "calib/mean_conf": 0.009562500000000002, "calib/mu_c": 0.009583333333333334, "calib/mu_w": 0.009557692307692307, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010310984737162595, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24661754385964912, "calib/step_q_c_n": 285.0, "calib/step_q_gap": -0.07756446023033042, "calib/step_q_w": 0.32418200408997955, "calib/step_q_w_n": 978.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 755.0, "completions/max_terminated_length": 755.0, "completions/mean_length": 229.77734375, "completions/mean_terminated_length": 230.67845153808594, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.16533333333333333, "grad_norm": 0.015045061707496643, "learning_rate": 1.25e-06, "loss": 0.2287, "num_tokens": 29832065.0, "reward": 1.0016980171203613, "reward_std": 0.0030909671913832426, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8158959746360779, "rewards/format_reward_step": 1.0, "step": 155 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8972544763237238, "aux_distill/mean_u": 0.2473313923428918, "aux_distill/n_active_tok": 164.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46288080449571134, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.18242578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0012630385487528342, "calib/mean_conf": 0.00898046875, "calib/mu_c": 0.007959183673469388, "calib/mu_w": 0.009222222222222222, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008474894529153353, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2990153846153846, "calib/step_q_c_n": 260.0, "calib/step_q_gap": -0.009392932964955636, "calib/step_q_w": 0.30840831758034026, "calib/step_q_w_n": 1058.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 549.0, "completions/max_terminated_length": 549.0, "completions/mean_length": 237.28125, "completions/mean_terminated_length": 238.21177673339844, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.1664, "grad_norm": 0.015800490975379944, "learning_rate": 1.2222222222222223e-06, "loss": 0.2672, "num_tokens": 29997569.0, "reward": 1.0014472007751465, "reward_std": 0.0030206539668142796, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.811488151550293, "rewards/format_reward_step": 1.0, "step": 156 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8981580398976803, "aux_distill/mean_u": 0.2928962613172708, "aux_distill/n_active_tok": 163.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5688922500720254, "calib/avg_num_step_conf": 5.14453125, "calib/ece": 0.2927734375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008310285220397583, "calib/mean_conf": 0.011914062500000001, "calib/mu_c": 0.017692307692307695, "calib/mu_w": 0.009382022471910112, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.031459430060732096, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3103519061583578, "calib/step_q_c_n": 341.0, "calib/step_q_gap": 0.019046578289505345, "calib/step_q_w": 0.29130532786885244, "calib/step_q_w_n": 976.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 599.0, "completions/max_terminated_length": 599.0, "completions/mean_length": 230.11328125, "completions/mean_terminated_length": 231.0157012939453, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.16746666666666668, "grad_norm": 0.013263057917356491, "learning_rate": 1.1944444444444446e-06, "loss": 0.2412, "num_tokens": 30160206.0, "reward": 1.0048247575759888, "reward_std": 0.008723842911422253, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7049621343612671, "rewards/format_reward_step": 1.0, "step": 157 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8862754721194506, "aux_distill/mean_u": 0.25307615114231347, "aux_distill/n_active_tok": 149.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4954807542465327, "calib/avg_num_step_conf": 4.66796875, "calib/ece": 0.26209490196078433, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.003050116877045348, "calib/mean_conf": 0.011630588235294118, "calib/mu_c": 0.009405797101449277, "calib/mu_w": 0.012455913978494625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015686274509803923, "calib/std_conf": 0.026572080394969538, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29868025078369903, "calib/step_q_c_n": 319.0, "calib/step_q_gap": -0.0011962332345658022, "calib/step_q_w": 0.29987648401826483, "calib/step_q_w_n": 876.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 948.0, "completions/max_terminated_length": 948.0, "completions/mean_length": 226.74609375, "completions/mean_terminated_length": 227.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.16853333333333334, "grad_norm": 0.015819691121578217, "learning_rate": 1.1666666666666668e-06, "loss": 0.25, "num_tokens": 30323493.0, "reward": 0.9982098340988159, "reward_std": 0.014394954778254032, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7307947874069214, "rewards/format_reward_step": 0.99609375, "step": 158 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8714629802852869, "aux_distill/mean_u": 0.24025526118135004, "aux_distill/n_active_tok": 165.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4885566188197767, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.2496484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006395534290271118, "calib/mean_conf": 0.0094140625, "calib/mu_c": 0.008939393939393941, "calib/mu_w": 0.009578947368421053, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000625, "calib/std_conf": 0.012749403995720495, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26255775577557755, "calib/step_q_c_n": 303.0, "calib/step_q_gap": -0.029500113237130143, "calib/step_q_w": 0.2920578690127077, "calib/step_q_w_n": 1023.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 756.0, "completions/max_terminated_length": 756.0, "completions/mean_length": 231.953125, "completions/mean_terminated_length": 232.86276245117188, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.1696, "grad_norm": 0.01259444747120142, "learning_rate": 1.138888888888889e-06, "loss": 0.2727, "num_tokens": 30487657.0, "reward": 1.0021791458129883, "reward_std": 0.003811280243098736, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.746545672416687, "rewards/format_reward_step": 1.0, "step": 159 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8943109344691038, "aux_distill/mean_u": 0.2432022044371476, "aux_distill/n_active_tok": 161.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46281108597285064, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.19409375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0016802413273001495, "calib/mean_conf": 0.00903125, "calib/mu_c": 0.007692307692307693, "calib/mu_w": 0.009372549019607842, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008483382193294134, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2612128514056225, "calib/step_q_c_n": 249.0, "calib/step_q_gap": -0.01567305463464591, "calib/step_q_w": 0.2768859060402684, "calib/step_q_w_n": 1043.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 761.0, "completions/max_terminated_length": 761.0, "completions/mean_length": 225.2890625, "completions/mean_terminated_length": 226.1725616455078, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.17066666666666666, "grad_norm": 0.01451224647462368, "learning_rate": 1.111111111111111e-06, "loss": 0.2765, "num_tokens": 30650171.0, "reward": 1.0014857053756714, "reward_std": 0.0025799009017646313, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7998464703559875, "rewards/format_reward_step": 1.0, "step": 160 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8607595209032297, "aux_distill/mean_u": 0.2243861432103818, "aux_distill/n_active_tok": 153.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5905117721697448, "calib/avg_num_step_conf": 4.80859375, "calib/ece": 0.31149019607843137, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0022212040039475533, "calib/mean_conf": 0.01007843137254902, "calib/mu_c": 0.011585365853658536, "calib/mu_w": 0.009364161849710983, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009579068556083783, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2861194444444444, "calib/step_q_c_n": 360.0, "calib/step_q_gap": -0.02870030297231796, "calib/step_q_w": 0.3148197474167624, "calib/step_q_w_n": 871.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2208.0, "completions/max_terminated_length": 2208.0, "completions/mean_length": 234.79296875, "completions/mean_terminated_length": 234.79296875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "epoch": 0.17173333333333332, "grad_norm": 0.014872077852487564, "learning_rate": 1.0833333333333335e-06, "loss": 0.3085, "num_tokens": 30814198.0, "reward": 0.999708354473114, "reward_std": 0.015861758962273598, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6830105781555176, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8608557693660259, "aux_distill/mean_u": 0.23922221231065072, "aux_distill/n_active_tok": 161.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5466666666666667, "calib/avg_num_step_conf": 5.05859375, "calib/ece": 0.34247960784313725, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003579595959595958, "calib/mean_conf": 0.010461568627450981, "calib/mu_c": 0.012777777777777777, "calib/mu_w": 0.009198181818181819, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.020830856013262875, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2778433179723502, "calib/step_q_c_n": 434.0, "calib/step_q_gap": 0.022501854557716028, "calib/step_q_w": 0.25534146341463415, "calib/step_q_w_n": 861.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 246.33984375, "completions/mean_terminated_length": 246.33984375, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.1728, "grad_norm": 0.014128386043012142, "learning_rate": 1.0555555555555557e-06, "loss": 0.316, "num_tokens": 30981405.0, "reward": 1.0003153085708618, "reward_std": 0.018365781754255295, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6529743671417236, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.907149713486433, "aux_distill/mean_u": 0.2809183020412603, "aux_distill/n_active_tok": 158.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5661311414160284, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.1992392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001162245469830004, "calib/mean_conf": 0.009701960784313726, "calib/mu_c": 0.010622641509433964, "calib/mu_w": 0.00946039603960396, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005490196078431374, "calib/std_conf": 0.01125590895269695, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2518829268292683, "calib/step_q_c_n": 205.0, "calib/step_q_gap": 0.016614129836787117, "calib/step_q_w": 0.2352687969924812, "calib/step_q_w_n": 1064.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2564.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 241.34765625, "completions/mean_terminated_length": 241.34765625, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "epoch": 0.17386666666666667, "grad_norm": 0.013407212682068348, "learning_rate": 1.0277777777777777e-06, "loss": 0.3076, "num_tokens": 31148022.0, "reward": 0.9981828927993774, "reward_std": 0.014493662863969803, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7932409644126892, "rewards/format_reward_step": 0.99609375, "step": 163 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9057147167623043, "aux_distill/mean_u": 0.23854662639106458, "aux_distill/n_active_tok": 158.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5615351629502573, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.16271093749999999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0023816466552315646, "calib/mean_conf": 0.0091640625, "calib/mu_c": 0.011136363636363639, "calib/mu_w": 0.008754716981132074, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009547805166429285, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2523084577114428, "calib/step_q_c_n": 201.0, "calib/step_q_gap": 0.00016652821237039817, "calib/step_q_w": 0.2521419294990724, "calib/step_q_w_n": 1078.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 733.0, "completions/max_terminated_length": 733.0, "completions/mean_length": 238.85546875, "completions/mean_terminated_length": 239.79217529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.17493333333333333, "grad_norm": 0.016263170167803764, "learning_rate": 1.0000000000000002e-06, "loss": 0.2661, "num_tokens": 31315305.0, "reward": 1.001826524734497, "reward_std": 0.0030358254443854094, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8317779898643494, "rewards/format_reward_step": 1.0, "step": 164 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9077084865421057, "aux_distill/mean_u": 0.28237916739274693, "aux_distill/n_active_tok": 160.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4865963011395479, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.19998823529411766, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00015028955725761208, "calib/mean_conf": 0.007854901960784314, "calib/mu_c": 0.007735849056603774, "calib/mu_w": 0.007886138613861386, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0073392929542851525, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.20630434782608698, "calib/step_q_c_n": 253.0, "calib/step_q_gap": -0.04420050654284502, "calib/step_q_w": 0.250504854368932, "calib/step_q_w_n": 1030.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 749.0, "completions/max_terminated_length": 749.0, "completions/mean_length": 240.2578125, "completions/mean_terminated_length": 241.20001220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.176, "grad_norm": 0.015873249620199203, "learning_rate": 9.722222222222224e-07, "loss": 0.2549, "num_tokens": 31482387.0, "reward": 0.9976377487182617, "reward_std": 0.0134980957955122, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.7921505570411682, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9185799043625593, "aux_distill/mean_u": 0.30033257046494144, "aux_distill/n_active_tok": 190.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5301592161665646, "calib/avg_num_step_conf": 5.9453125, "calib/ece": 0.26993333333333336, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001300750153092469, "calib/mean_conf": 0.008498039215686273, "calib/mu_c": 0.009436619718309862, "calib/mu_w": 0.008135869565217392, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007924583002478301, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27285714285714285, "calib/step_q_c_n": 392.0, "calib/step_q_gap": -0.01204905183312266, "calib/step_q_w": 0.2849061946902655, "calib/step_q_w_n": 1130.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1515.0, "completions/max_terminated_length": 1515.0, "completions/mean_length": 284.56640625, "completions/mean_terminated_length": 285.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.17706666666666668, "grad_norm": 0.015122036449611187, "learning_rate": 9.444444444444445e-07, "loss": 0.2623, "num_tokens": 31661420.0, "reward": 0.998643696308136, "reward_std": 0.014669304713606834, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.723849892616272, "rewards/format_reward_step": 0.99609375, "step": 166 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9328858200460672, "aux_distill/mean_u": 0.30078049686394437, "aux_distill/n_active_tok": 165.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48206477732793523, "calib/avg_num_step_conf": 5.20703125, "calib/ece": 0.24657254901960782, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004421052631578947, "calib/mean_conf": 0.008329411764705882, "calib/mu_c": 0.008, "calib/mu_w": 0.008442105263157895, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008261493339766052, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2694563953488372, "calib/step_q_c_n": 344.0, "calib/step_q_gap": 0.02055042972699697, "calib/step_q_w": 0.24890596562184022, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 858.0, "completions/max_terminated_length": 858.0, "completions/mean_length": 245.75390625, "completions/mean_terminated_length": 246.71766662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.17813333333333334, "grad_norm": 0.01606198400259018, "learning_rate": 9.166666666666666e-07, "loss": 0.2581, "num_tokens": 31829941.0, "reward": 0.9980564117431641, "reward_std": 0.014442476443946362, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7461129426956177, "rewards/format_reward_step": 0.99609375, "step": 167 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9054408930242062, "aux_distill/mean_u": 0.28933193706955174, "aux_distill/n_active_tok": 154.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5035913806863528, "calib/avg_num_step_conf": 4.85546875, "calib/ece": 0.29549218749999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.005436987593412176, "calib/mean_conf": 0.0126328125, "calib/mu_c": 0.00883116883116883, "calib/mu_w": 0.014268156424581006, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003671875, "calib/std_conf": 0.05860738955831289, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22799168975069253, "calib/step_q_c_n": 361.0, "calib/step_q_gap": -0.03408654154182447, "calib/step_q_w": 0.262078231292517, "calib/step_q_w_n": 882.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 633.0, "completions/max_terminated_length": 633.0, "completions/mean_length": 235.59375, "completions/mean_terminated_length": 236.5176544189453, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.1792, "grad_norm": 0.016619674861431122, "learning_rate": 8.88888888888889e-07, "loss": 0.2645, "num_tokens": 31994925.0, "reward": 1.000859022140503, "reward_std": 0.008521142415702343, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7009367942810059, "rewards/format_reward_step": 1.0, "step": 168 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.887091588228941, "aux_distill/mean_u": 0.2387504252351404, "aux_distill/n_active_tok": 168.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5559084071536019, "calib/avg_num_step_conf": 5.2578125, "calib/ece": 0.2358832889534415, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013826844096682719, "calib/mean_conf": 0.011175534575970305, "calib/mu_c": 0.010129032258064517, "calib/mu_w": 0.01151171666773279, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00196078431372549, "calib/std_conf": 0.03209480971062626, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2120066889632107, "calib/step_q_c_n": 299.0, "calib/step_q_gap": -0.029857868111700853, "calib/step_q_w": 0.24186455707491156, "calib/step_q_w_n": 1047.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 266.71875, "completions/mean_terminated_length": 266.71875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.18026666666666666, "grad_norm": 0.013696101494133472, "learning_rate": 8.611111111111112e-07, "loss": 0.2917, "num_tokens": 32167389.0, "reward": 0.9940655827522278, "reward_std": 0.019284099340438843, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7537561655044556, "rewards/format_reward_step": 0.9921875, "step": 169 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8750657048076391, "aux_distill/mean_u": 0.25825254583750035, "aux_distill/n_active_tok": 182.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5602652825836216, "calib/avg_num_step_conf": 5.69140625, "calib/ece": 0.18969411764705885, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00228921568627451, "calib/mean_conf": 0.010305882352941177, "calib/mu_c": 0.012137254901960784, "calib/mu_w": 0.009848039215686274, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009186123630711062, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3257341772151899, "calib/step_q_c_n": 316.0, "calib/step_q_gap": 0.0502609081529638, "calib/step_q_w": 0.2754732690622261, "calib/step_q_w_n": 1141.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1676.0, "completions/max_terminated_length": 1676.0, "completions/mean_length": 275.77734375, "completions/mean_terminated_length": 275.77734375, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "epoch": 0.18133333333333335, "grad_norm": 0.012967259623110294, "learning_rate": 8.333333333333333e-07, "loss": 0.2707, "num_tokens": 32342140.0, "reward": 0.9984167814254761, "reward_std": 0.014727575704455376, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8015210628509521, "rewards/format_reward_step": 0.99609375, "step": 170 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8613785710185766, "aux_distill/mean_u": 0.24564890563767686, "aux_distill/n_active_tok": 150.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5454712137880455, "calib/avg_num_step_conf": 4.72265625, "calib/ece": 0.1968359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.020383204987165386, "calib/mean_conf": 0.014101562500000001, "calib/mu_c": 0.030185185185185186, "calib/mu_w": 0.009801980198019802, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.062456027011478996, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2682475247524752, "calib/step_q_c_n": 202.0, "calib/step_q_gap": 0.008543651862703616, "calib/step_q_w": 0.2597038728897716, "calib/step_q_w_n": 1007.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 700.0, "completions/max_terminated_length": 700.0, "completions/mean_length": 236.4765625, "completions/mean_terminated_length": 237.4039306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.1824, "grad_norm": 0.014070946723222733, "learning_rate": 8.055555555555557e-07, "loss": 0.2691, "num_tokens": 32509574.0, "reward": 1.0043174028396606, "reward_std": 0.009339498355984688, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.7976972460746765, "rewards/format_reward_step": 1.0, "step": 171 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8902777750045061, "aux_distill/mean_u": 0.22226979449947362, "aux_distill/n_active_tok": 158.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.46706539074960124, "calib/avg_num_step_conf": 5.00390625, "calib/ece": 0.2478359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007850079744816576, "calib/mean_conf": 0.009976562500000001, "calib/mu_c": 0.009393939393939395, "calib/mu_w": 0.010178947368421053, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009167678314796705, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2509805194805195, "calib/step_q_c_n": 308.0, "calib/step_q_gap": -0.022046201999439352, "calib/step_q_w": 0.2730267214799589, "calib/step_q_w_n": 973.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 786.0, "completions/max_terminated_length": 786.0, "completions/mean_length": 238.98828125, "completions/mean_terminated_length": 239.92550659179688, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.18346666666666667, "grad_norm": 0.016252703964710236, "learning_rate": 7.777777777777779e-07, "loss": 0.2919, "num_tokens": 32674107.0, "reward": 1.0023300647735596, "reward_std": 0.003432949772104621, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7468476295471191, "rewards/format_reward_step": 1.0, "step": 172 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8446943629533052, "aux_distill/mean_u": 0.2223728193424118, "aux_distill/n_active_tok": 162.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5111847199518197, "calib/avg_num_step_conf": 5.09375, "calib/ece": 0.21993359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001661275058074509, "calib/mean_conf": 0.01053515625, "calib/mu_c": 0.011813559322033899, "calib/mu_w": 0.01015228426395939, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009616815431216612, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2718935483870968, "calib/step_q_c_n": 310.0, "calib/step_q_gap": -0.0031336145907704394, "calib/step_q_w": 0.2750271629778672, "calib/step_q_w_n": 994.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 242.828125, "completions/mean_terminated_length": 243.78041076660156, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.18453333333333333, "grad_norm": 0.014125216752290726, "learning_rate": 7.5e-07, "loss": 0.2574, "num_tokens": 32839431.0, "reward": 1.0026209354400635, "reward_std": 0.004656236618757248, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.774773120880127, "rewards/format_reward_step": 1.0, "step": 173 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8671461902558804, "aux_distill/mean_u": 0.2609641669285706, "aux_distill/n_active_tok": 158.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48767991239048814, "calib/avg_num_step_conf": 5.0, "calib/ece": 0.2551796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008066332916145218, "calib/mean_conf": 0.0104453125, "calib/mu_c": 0.009852941176470587, "calib/mu_w": 0.010659574468085109, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009069875097119241, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2377955271565495, "calib/step_q_c_n": 313.0, "calib/step_q_gap": -0.07030788545978972, "calib/step_q_w": 0.3081034126163392, "calib/step_q_w_n": 967.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 928.0, "completions/max_terminated_length": 928.0, "completions/mean_length": 246.46875, "completions/mean_terminated_length": 247.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.1856, "grad_norm": 0.015688760206103325, "learning_rate": 7.222222222222222e-07, "loss": 0.2571, "num_tokens": 33006759.0, "reward": 1.0025215148925781, "reward_std": 0.00413602776825428, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7394180297851562, "rewards/format_reward_step": 1.0, "step": 174 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8709164746105671, "aux_distill/mean_u": 0.245912652958161, "aux_distill/n_active_tok": 157.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5352510808114399, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.232416015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007272198204190238, "calib/mean_conf": 0.009771484375, "calib/mu_c": 0.010322580645161292, "calib/mu_w": 0.009595360824742268, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007668391351621986, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27035856573705186, "calib/step_q_c_n": 251.0, "calib/step_q_gap": -0.006758682324963683, "calib/step_q_w": 0.27711724806201554, "calib/step_q_w_n": 1032.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 239.45703125, "completions/mean_terminated_length": 240.39608764648438, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.18666666666666668, "grad_norm": 0.013851930387318134, "learning_rate": 6.944444444444446e-07, "loss": 0.2803, "num_tokens": 33173884.0, "reward": 1.00242280960083, "reward_std": 0.0037492546252906322, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7626582384109497, "rewards/format_reward_step": 1.0, "step": 175 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9305590968579054, "aux_distill/mean_u": 0.29542864199826796, "aux_distill/n_active_tok": 173.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5979081472161056, "calib/avg_num_step_conf": 5.421875, "calib/ece": 0.25588235294117645, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0033422459893048123, "calib/mean_conf": 0.010784313725490196, "calib/mu_c": 0.013235294117647059, "calib/mu_w": 0.009893048128342246, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008953723566511192, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22322033898305083, "calib/step_q_c_n": 295.0, "calib/step_q_gap": -0.039495123048056247, "calib/step_q_w": 0.26271546203110707, "calib/step_q_w_n": 1093.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1004.0, "completions/max_terminated_length": 1004.0, "completions/mean_length": 256.49609375, "completions/mean_terminated_length": 257.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.18773333333333334, "grad_norm": 0.014219074510037899, "learning_rate": 6.666666666666667e-07, "loss": 0.2601, "num_tokens": 33343611.0, "reward": 0.9995114803314209, "reward_std": 0.015234305523335934, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7373043298721313, "rewards/format_reward_step": 0.99609375, "step": 176 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8567609488964081, "aux_distill/mean_u": 0.2489543489347476, "aux_distill/n_active_tok": 152.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47199518196678997, "calib/avg_num_step_conf": 4.84765625, "calib/ece": 0.2191328125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004444635636238498, "calib/mean_conf": 0.0113359375, "calib/mu_c": 0.011677966101694916, "calib/mu_w": 0.011233502538071066, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.015259872492786227, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17995348837209302, "calib/step_q_c_n": 258.0, "calib/step_q_gap": -0.09957255435425488, "calib/step_q_w": 0.2795260427263479, "calib/step_q_w_n": 983.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 915.0, "completions/max_terminated_length": 915.0, "completions/mean_length": 231.8515625, "completions/mean_terminated_length": 232.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.1888, "grad_norm": 0.014605239033699036, "learning_rate": 6.388888888888889e-07, "loss": 0.2796, "num_tokens": 33506797.0, "reward": 1.002510666847229, "reward_std": 0.004579597152769566, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7745527029037476, "rewards/format_reward_step": 1.0, "step": 177 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9091615509241819, "aux_distill/mean_u": 0.29264410281950765, "aux_distill/n_active_tok": 164.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5167112299465241, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.2567058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00045454545454545747, "calib/mean_conf": 0.00996078431372549, "calib/mu_c": 0.010294117647058825, "calib/mu_w": 0.009839572192513368, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008092506299557722, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25118611987381706, "calib/step_q_c_n": 317.0, "calib/step_q_gap": -7.961439191722652e-05, "calib/step_q_w": 0.2512657342657343, "calib/step_q_w_n": 1001.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 255.21875, "completions/mean_terminated_length": 255.21875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.18986666666666666, "grad_norm": 0.013889545574784279, "learning_rate": 6.111111111111112e-07, "loss": 0.3144, "num_tokens": 33678205.0, "reward": 0.9987460374832153, "reward_std": 0.015478258952498436, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7357734441757202, "rewards/format_reward_step": 0.99609375, "step": 178 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.889463035389781, "aux_distill/mean_u": 0.25818936434507933, "aux_distill/n_active_tok": 160.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5686725944744363, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.25311886563527475, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002126533770932748, "calib/mean_conf": 0.00962623240394095, "calib/mu_c": 0.01119402985074627, "calib/mu_w": 0.009067496079813522, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008535101580992532, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26551572327044026, "calib/step_q_c_n": 318.0, "calib/step_q_gap": -0.020559035122322822, "calib/step_q_w": 0.2860747583927631, "calib/step_q_w_n": 968.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 997.0, "completions/max_terminated_length": 997.0, "completions/mean_length": 252.7890625, "completions/mean_terminated_length": 253.78041076660156, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.19093333333333334, "grad_norm": 0.012719987891614437, "learning_rate": 5.833333333333334e-07, "loss": 0.2694, "num_tokens": 33849183.0, "reward": 0.9989409446716309, "reward_std": 0.014448285102844238, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7400695085525513, "rewards/format_reward_step": 0.99609375, "step": 179 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9014210309833288, "aux_distill/mean_u": 0.2916708409763812, "aux_distill/n_active_tok": 158.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4855459970549485, "calib/avg_num_step_conf": 4.97265625, "calib/ece": 0.25894921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011935983879717872, "calib/mean_conf": 0.010582031249999999, "calib/mu_c": 0.009710144927536232, "calib/mu_w": 0.01090374331550802, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008816328366957723, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26728086419753083, "calib/step_q_c_n": 324.0, "calib/step_q_gap": 0.022341559666445493, "calib/step_q_w": 0.24493930453108534, "calib/step_q_w_n": 949.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 776.0, "completions/max_terminated_length": 776.0, "completions/mean_length": 250.1953125, "completions/mean_terminated_length": 251.17648315429688, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.192, "grad_norm": 0.015904264524579048, "learning_rate": 5.555555555555555e-07, "loss": 0.3061, "num_tokens": 34017089.0, "reward": 1.002522349357605, "reward_std": 0.003945849370211363, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.73551344871521, "rewards/format_reward_step": 1.0, "step": 180 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8790353294461966, "aux_distill/mean_u": 0.21719419774970758, "aux_distill/n_active_tok": 149.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4607611160406192, "calib/avg_num_step_conf": 4.65625, "calib/ece": 0.18908554687499998, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": -0.00896292024055999, "calib/mean_conf": 0.016226953125, "calib/mu_c": 0.008979591836734696, "calib/mu_w": 0.017942512077294685, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006953125, "calib/std_conf": 0.07790991452531286, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24873873873873872, "calib/step_q_c_n": 222.0, "calib/step_q_gap": -0.02705940559115813, "calib/step_q_w": 0.27579814432989685, "calib/step_q_w_n": 970.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 564.0, "completions/max_terminated_length": 564.0, "completions/mean_length": 222.2265625, "completions/mean_terminated_length": 223.09805297851562, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.19306666666666666, "grad_norm": 0.013717937283217907, "learning_rate": 5.277777777777779e-07, "loss": 0.2549, "num_tokens": 34180243.0, "reward": 0.9985520839691162, "reward_std": 0.011695407330989838, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8056979775428772, "rewards/format_reward_step": 1.0, "step": 181 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8650721274316311, "aux_distill/mean_u": 0.214282604759036, "aux_distill/n_active_tok": 159.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5106608072916666, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.24, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008333333333333335, "calib/mean_conf": 0.010000000000000002, "calib/mu_c": 0.010624999999999999, "calib/mu_w": 0.009791666666666666, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007603453162872775, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24737201365187714, "calib/step_q_c_n": 293.0, "calib/step_q_gap": -0.039943840746505116, "calib/step_q_w": 0.28731585439838225, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 679.0, "completions/max_terminated_length": 679.0, "completions/mean_length": 247.671875, "completions/mean_terminated_length": 248.64315795898438, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.19413333333333332, "grad_norm": 0.01505859661847353, "learning_rate": 5.000000000000001e-07, "loss": 0.2828, "num_tokens": 34349807.0, "reward": 1.002577304840088, "reward_std": 0.004599182400852442, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7551547288894653, "rewards/format_reward_step": 1.0, "step": 182 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8601448759436607, "aux_distill/mean_u": 0.24111330375244838, "aux_distill/n_active_tok": 154.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4941244239631336, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.2634375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00039324116743471964, "calib/mean_conf": 0.01, "calib/mu_c": 0.010285714285714289, "calib/mu_w": 0.009892473118279569, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00960143218483576, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22008132530120483, "calib/step_q_c_n": 332.0, "calib/step_q_gap": -0.026885815991237655, "calib/step_q_w": 0.2469671412924425, "calib/step_q_w_n": 913.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 902.0, "completions/max_terminated_length": 902.0, "completions/mean_length": 229.36328125, "completions/mean_terminated_length": 230.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1952, "grad_norm": 0.016035255044698715, "learning_rate": 4.7222222222222226e-07, "loss": 0.2392, "num_tokens": 34515204.0, "reward": 1.002716302871704, "reward_std": 0.00421358086168766, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7319953441619873, "rewards/format_reward_step": 1.0, "step": 183 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.920300142839551, "aux_distill/mean_u": 0.27552763185378215, "aux_distill/n_active_tok": 155.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5023314790575917, "calib/avg_num_step_conf": 4.85546875, "calib/ece": 0.24110196078431373, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00037090968586387524, "calib/mean_conf": 0.00987843137254902, "calib/mu_c": 0.01015625, "calib/mu_w": 0.009785340314136125, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008278777823162683, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24364912280701753, "calib/step_q_c_n": 285.0, "calib/step_q_gap": -0.03158469765227265, "calib/step_q_w": 0.2752338204592902, "calib/step_q_w_n": 958.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 733.0, "completions/max_terminated_length": 733.0, "completions/mean_length": 240.50390625, "completions/mean_terminated_length": 241.4470672607422, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.19626666666666667, "grad_norm": 0.014850256964564323, "learning_rate": 4.444444444444445e-07, "loss": 0.2515, "num_tokens": 34682053.0, "reward": 0.9985500574111938, "reward_std": 0.015233214944601059, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7510063648223877, "rewards/format_reward_step": 0.99609375, "step": 184 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8545205853879452, "aux_distill/mean_u": 0.23646542383363647, "aux_distill/n_active_tok": 164.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5184684684684685, "calib/avg_num_step_conf": 5.234375, "calib/ece": 0.28124015748031495, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000375975975975976, "calib/mean_conf": 0.010098425196850395, "calib/mu_c": 0.010364864864864865, "calib/mu_w": 0.00998888888888889, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.007542671097414667, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3316076923076923, "calib/step_q_c_n": 390.0, "calib/step_q_gap": 0.08417506072874495, "calib/step_q_w": 0.24743263157894738, "calib/step_q_w_n": 950.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2685.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 256.17578125, "completions/mean_terminated_length": 257.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.19733333333333333, "grad_norm": 0.012667279690504074, "learning_rate": 4.1666666666666667e-07, "loss": 0.2685, "num_tokens": 34854554.0, "reward": 0.9951047897338867, "reward_std": 0.02564604952931404, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.7089595794677734, "rewards/format_reward_step": 0.9921875, "step": 185 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9243546165525913, "aux_distill/mean_u": 0.2849980260621753, "aux_distill/n_active_tok": 165.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5357444651005577, "calib/avg_num_step_conf": 5.16796875, "calib/ece": 0.22968627450980394, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004030758830488434, "calib/mean_conf": 0.009529411764705882, "calib/mu_c": 0.009836065573770493, "calib/mu_w": 0.00943298969072165, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008664448300951515, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2763677685950413, "calib/step_q_c_n": 242.0, "calib/step_q_gap": 0.03412355027866759, "calib/step_q_w": 0.24224421831637372, "calib/step_q_w_n": 1081.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 928.0, "completions/max_terminated_length": 928.0, "completions/mean_length": 260.7578125, "completions/mean_terminated_length": 261.7803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.1984, "grad_norm": 0.014339666813611984, "learning_rate": 3.8888888888888895e-07, "loss": 0.3138, "num_tokens": 35026348.0, "reward": 0.9983549118041992, "reward_std": 0.01398992445319891, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7623347640037537, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8787559699267149, "aux_distill/mean_u": 0.23315337709486664, "aux_distill/n_active_tok": 152.125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5551409618573797, "calib/avg_num_step_conf": 4.75390625, "calib/ece": 0.20044705882352942, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008891652846876718, "calib/mean_conf": 0.01131764705882353, "calib/mu_c": 0.012018518518518519, "calib/mu_w": 0.011129353233830847, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009315656353000426, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3372490118577075, "calib/step_q_c_n": 253.0, "calib/step_q_gap": 0.05299071310252079, "calib/step_q_w": 0.2842582987551867, "calib/step_q_w_n": 964.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1499.0, "completions/max_terminated_length": 1499.0, "completions/mean_length": 233.046875, "completions/mean_terminated_length": 233.96080017089844, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.19946666666666665, "grad_norm": 0.01473988126963377, "learning_rate": 3.611111111111111e-07, "loss": 0.2494, "num_tokens": 35187552.0, "reward": 0.9985218644142151, "reward_std": 0.0148072000592947, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.790012538433075, "rewards/format_reward_step": 0.99609375, "step": 187 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9031260069459677, "aux_distill/mean_u": 0.26083758666871576, "aux_distill/n_active_tok": 153.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5989276703111859, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.3093046875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003002803476310628, "calib/mean_conf": 0.0110078125, "calib/mu_c": 0.01304878048780488, "calib/mu_w": 0.010045977011494251, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009012141197564749, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27171988795518204, "calib/step_q_c_n": 357.0, "calib/step_q_gap": 0.03275856827372586, "calib/step_q_w": 0.23896131968145617, "calib/step_q_w_n": 879.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 235.7421875, "completions/mean_terminated_length": 236.66668701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.20053333333333334, "grad_norm": 0.015368984080851078, "learning_rate": 3.3333333333333335e-07, "loss": 0.2734, "num_tokens": 35351974.0, "reward": 1.0040783882141113, "reward_std": 0.005144909024238586, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.687844455242157, "rewards/format_reward_step": 1.0, "step": 188 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8606499591842294, "aux_distill/mean_u": 0.24700294945643025, "aux_distill/n_active_tok": 164.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5778454510848877, "calib/avg_num_step_conf": 5.1953125, "calib/ece": 0.2675390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0026090597639893427, "calib/mean_conf": 0.0098046875, "calib/mu_c": 0.011690140845070423, "calib/mu_w": 0.00908108108108108, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007927955475867896, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2618125, "calib/step_q_c_n": 320.0, "calib/step_q_gap": 0.009662995049504963, "calib/step_q_w": 0.25214950495049504, "calib/step_q_w_n": 1010.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 734.0, "completions/max_terminated_length": 734.0, "completions/mean_length": 245.53515625, "completions/mean_terminated_length": 246.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.2016, "grad_norm": 0.013666624203324318, "learning_rate": 3.055555555555556e-07, "loss": 0.2603, "num_tokens": 35522599.0, "reward": 1.0031626224517822, "reward_std": 0.0042640105821192265, "rewards/accuracy_reward_step": 0.27734375, "rewards/final_brier_reward_step": 0.7289816737174988, "rewards/format_reward_step": 1.0, "step": 189 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8491978226229548, "aux_distill/mean_u": 0.2470079832588207, "aux_distill/n_active_tok": 177.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5465555206039635, "calib/avg_num_step_conf": 5.53125, "calib/ece": 0.256035294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005628342245989348, "calib/mean_conf": 0.01063137254901961, "calib/mu_c": 0.011044117647058826, "calib/mu_w": 0.010481283422459891, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009472219240061723, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24370666666666663, "calib/step_q_c_n": 375.0, "calib/step_q_gap": -0.0425815177713737, "calib/step_q_w": 0.2862881844380403, "calib/step_q_w_n": 1041.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 859.0, "completions/max_terminated_length": 859.0, "completions/mean_length": 256.40625, "completions/mean_terminated_length": 257.4117736816406, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.20266666666666666, "grad_norm": 0.014104792848229408, "learning_rate": 2.7777777777777776e-07, "loss": 0.2561, "num_tokens": 35693847.0, "reward": 0.9989263415336609, "reward_std": 0.015147150494158268, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7361339330673218, "rewards/format_reward_step": 0.99609375, "step": 190 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8412365037947893, "aux_distill/mean_u": 0.20567782297729037, "aux_distill/n_active_tok": 158.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5478466910542068, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.231796875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014000665114732302, "calib/mean_conf": 0.010390624999999999, "calib/mu_c": 0.011451612903225808, "calib/mu_w": 0.010051546391752578, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008043081630157373, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2528014184397163, "calib/step_q_c_n": 282.0, "calib/step_q_gap": -0.007035234946737856, "calib/step_q_w": 0.2598366533864542, "calib/step_q_w_n": 1004.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1019.0, "completions/max_terminated_length": 1019.0, "completions/mean_length": 231.63671875, "completions/mean_terminated_length": 232.54510498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.20373333333333332, "grad_norm": 0.01434285193681717, "learning_rate": 2.5000000000000004e-07, "loss": 0.2495, "num_tokens": 35857314.0, "reward": 1.0026870965957642, "reward_std": 0.004015287384390831, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.7631866931915283, "rewards/format_reward_step": 1.0, "step": 191 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8681392464786768, "aux_distill/mean_u": 0.22151961991092448, "aux_distill/n_active_tok": 147.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5210526315789474, "calib/avg_num_step_conf": 4.625, "calib/ece": 0.24690286144578313, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008566783881939263, "calib/mean_conf": 0.010909638554216866, "calib/mu_c": 0.011545454545454546, "calib/mu_w": 0.01068877615726062, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008304116287721583, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23784768211920534, "calib/step_q_c_n": 302.0, "calib/step_q_gap": -0.0034210253637878274, "calib/step_q_w": 0.24126870748299317, "calib/step_q_w_n": 882.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 235.9375, "completions/mean_terminated_length": 236.86276245117188, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.2048, "grad_norm": 0.015469144098460674, "learning_rate": 2.2222222222222224e-07, "loss": 0.2585, "num_tokens": 36022690.0, "reward": 1.0028825998306274, "reward_std": 0.004011563956737518, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7479526400566101, "rewards/format_reward_step": 1.0, "step": 192 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9071143828332424, "aux_distill/mean_u": 0.25343324748730706, "aux_distill/n_active_tok": 157.25, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.531650641025641, "calib/avg_num_step_conf": 4.96875, "calib/ece": 0.1779375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002820512820512818, "calib/mean_conf": 0.009562500000000002, "calib/mu_c": 0.009791666666666666, "calib/mu_w": 0.009509615384615384, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008472151069828723, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20765566037735847, "calib/step_q_c_n": 212.0, "calib/step_q_gap": -0.03716226415094348, "calib/step_q_w": 0.24481792452830195, "calib/step_q_w_n": 1060.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 797.0, "completions/max_terminated_length": 797.0, "completions/mean_length": 246.375, "completions/mean_terminated_length": 247.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.20586666666666667, "grad_norm": 0.013341576792299747, "learning_rate": 1.9444444444444447e-07, "loss": 0.2691, "num_tokens": 36191474.0, "reward": 1.0017542839050293, "reward_std": 0.002907798858359456, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.8160086870193481, "rewards/format_reward_step": 1.0, "step": 193 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8788082543760538, "aux_distill/mean_u": 0.25585743095938557, "aux_distill/n_active_tok": 150.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4472084024322831, "calib/avg_num_step_conf": 4.7421875, "calib/ece": 0.2514921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0013176972281449904, "calib/mean_conf": 0.0102265625, "calib/mu_c": 0.009253731343283582, "calib/mu_w": 0.010571428571428572, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007982123428862382, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23755555555555555, "calib/step_q_c_n": 315.0, "calib/step_q_gap": -0.026266468916079616, "calib/step_q_w": 0.26382202447163516, "calib/step_q_w_n": 899.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 608.0, "completions/max_terminated_length": 608.0, "completions/mean_length": 231.01953125, "completions/mean_terminated_length": 231.92550659179688, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.20693333333333333, "grad_norm": 0.015607761219143867, "learning_rate": 1.6666666666666668e-07, "loss": 0.2627, "num_tokens": 36356559.0, "reward": 1.0023376941680908, "reward_std": 0.004235986620187759, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7429567575454712, "rewards/format_reward_step": 1.0, "step": 194 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9156402302905917, "aux_distill/mean_u": 0.2884676813592923, "aux_distill/n_active_tok": 154.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4467803587180241, "calib/avg_num_step_conf": 4.83203125, "calib/ece": 0.2919058823529412, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.006901940605704204, "calib/mean_conf": 0.013976470588235295, "calib/mu_c": 0.009131578947368421, "calib/mu_w": 0.016033519553072625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00392156862745098, "calib/std_conf": 0.062428118941724314, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2698305084745763, "calib/step_q_c_n": 354.0, "calib/step_q_gap": 0.020102309154078013, "calib/step_q_w": 0.24972819932049828, "calib/step_q_w_n": 883.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 261.703125, "completions/mean_terminated_length": 261.703125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.208, "grad_norm": 0.01972484402358532, "learning_rate": 1.3888888888888888e-07, "loss": 0.2632, "num_tokens": 36529539.0, "reward": 0.9967663288116455, "reward_std": 0.020737262442708015, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7005640268325806, "rewards/format_reward_step": 0.99609375, "step": 195 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.9041774030774832, "aux_distill/mean_u": 0.2440162551747133, "aux_distill/n_active_tok": 149.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5318165784832452, "calib/avg_num_step_conf": 4.7421875, "calib/ece": 0.30703515625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001443738977072312, "calib/mean_conf": 0.00937109375, "calib/mu_c": 0.010358024691358025, "calib/mu_w": 0.008914285714285713, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007043321885212327, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26571623931623933, "calib/step_q_c_n": 351.0, "calib/step_q_gap": 0.008513458319715561, "calib/step_q_w": 0.25720278099652377, "calib/step_q_w_n": 863.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 222.2421875, "completions/mean_terminated_length": 223.11373901367188, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.20906666666666668, "grad_norm": 0.014004452154040337, "learning_rate": 1.1111111111111112e-07, "loss": 0.2739, "num_tokens": 36688977.0, "reward": 1.0032086372375488, "reward_std": 0.004075036384165287, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6900110244750977, "rewards/format_reward_step": 1.0, "step": 196 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.932471988722682, "aux_distill/mean_u": 0.3058232737935441, "aux_distill/n_active_tok": 152.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.538421052631579, "calib/avg_num_step_conf": 4.76171875, "calib/ece": 0.2446549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001113765182186234, "calib/mean_conf": 0.010247058823529411, "calib/mu_c": 0.011076923076923076, "calib/mu_w": 0.009963157894736842, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.008339237585435717, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2647366666666666, "calib/step_q_c_n": 300.0, "calib/step_q_gap": 0.012387373957199799, "calib/step_q_w": 0.2523492927094668, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2308.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 243.52734375, "completions/mean_terminated_length": 243.52734375, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.21013333333333334, "grad_norm": 0.01720503158867359, "learning_rate": 8.333333333333334e-08, "loss": 0.3229, "num_tokens": 36856376.0, "reward": 0.9988193511962891, "reward_std": 0.014782923273742199, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7476386427879333, "rewards/format_reward_step": 0.99609375, "step": 197 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8915483225136995, "aux_distill/mean_u": 0.22846494568973005, "aux_distill/n_active_tok": 151.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5470469346507971, "calib/avg_num_step_conf": 4.859375, "calib/ece": 0.2742578125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020008982708286573, "calib/mean_conf": 0.0108984375, "calib/mu_c": 0.012328767123287673, "calib/mu_w": 0.010327868852459015, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008859440448391409, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21185573770491803, "calib/step_q_c_n": 305.0, "calib/step_q_gap": -0.042164390090609155, "calib/step_q_w": 0.2540201277955272, "calib/step_q_w_n": 939.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 793.0, "completions/max_terminated_length": 793.0, "completions/mean_length": 223.81640625, "completions/mean_terminated_length": 224.6941375732422, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.2112, "grad_norm": 0.017510568723082542, "learning_rate": 5.555555555555556e-08, "loss": 0.2754, "num_tokens": 37019057.0, "reward": 1.003416895866394, "reward_std": 0.004575707949697971, "rewards/accuracy_reward_step": 0.28515625, "rewards/final_brier_reward_step": 0.7216777205467224, "rewards/format_reward_step": 1.0, "step": 198 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8829764146357775, "aux_distill/mean_u": 0.2720692958175646, "aux_distill/n_active_tok": 155.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4773125996810207, "calib/avg_num_step_conf": 4.91015625, "calib/ece": 0.247203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006169059011164255, "calib/mean_conf": 0.010609375, "calib/mu_c": 0.010151515151515154, "calib/mu_w": 0.01076842105263158, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007613961656678802, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2470553935860058, "calib/step_q_c_n": 343.0, "calib/step_q_gap": 0.014188435161498175, "calib/step_q_w": 0.23286695842450764, "calib/step_q_w_n": 914.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 794.0, "completions/max_terminated_length": 794.0, "completions/mean_length": 237.01171875, "completions/mean_terminated_length": 237.94119262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 55.0, "epoch": 0.21226666666666666, "grad_norm": 0.01510292012244463, "learning_rate": 2.777777777777778e-08, "loss": 0.2639, "num_tokens": 37183932.0, "reward": 1.002531886100769, "reward_std": 0.0038856856990605593, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7472513318061829, "rewards/format_reward_step": 1.0, "step": 199 }, { "aux_distill/lambda": 0.30000000000000004, "aux_distill/loss": 0.8596922922879457, "aux_distill/mean_u": 0.25277857857710645, "aux_distill/n_active_tok": 154.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5928548177083334, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.236171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.01760416666666667, "calib/mean_conf": 0.013828125, "calib/mu_c": 0.027031250000000003, "calib/mu_w": 0.009427083333333332, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.062313736118005135, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.18862068965517242, "calib/step_q_c_n": 261.0, "calib/step_q_gap": -0.09334189881803284, "calib/step_q_w": 0.28196258847320527, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 944.0, "completions/max_terminated_length": 944.0, "completions/mean_length": 250.58203125, "completions/mean_terminated_length": 251.56472778320312, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.21333333333333335, "grad_norm": 0.013802732340991497, "learning_rate": 0.0, "loss": 0.2705, "num_tokens": 37356129.0, "reward": 1.004720687866211, "reward_std": 0.008684433996677399, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.7594413757324219, "rewards/format_reward_step": 1.0, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.2853034897893667, "train_runtime": 14810.4923, "train_samples_per_second": 3.457, "train_steps_per_second": 0.014 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 37356129, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }