{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/final_loss": 0.6349183150700161, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.1, "aux_distill/loss": 0.20483983627387456, "aux_distill/mean_u": 0.31677682190706, "aux_distill/n_active_final_tok": 3.7142857142857144, "aux_distill/n_active_tok": 24.571428571428573, "aux_distill/step_loss": 1.4134800136089325, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "calib/step_q_c": 0.8921052631578947, "calib/step_q_c_n": 19.0, "calib/step_q_gap": 0.19807541241162607, "calib/step_q_w": 0.6940298507462687, "calib/step_q_w_n": 67.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.02741635963320732, "learning_rate": 2.5000000000000004e-07, "loss": 0.121, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/final_loss": 0.5865676277562192, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.1695779765907087, "aux_distill/mean_u": 0.2935626227740425, "aux_distill/n_active_final_tok": 4.631578947368421, "aux_distill/n_active_tok": 28.63157894736842, "aux_distill/step_loss": 1.1092121005058289, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.781, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.042553719008264435, "calib/step_q_w": 0.8235537190082645, "calib/step_q_w_n": 121.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.03957248851656914, "learning_rate": 5.000000000000001e-07, "loss": 0.1661, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/final_loss": 0.4299386143684387, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.19136657193303108, "aux_distill/mean_u": 0.31480732275070794, "aux_distill/n_active_final_tok": 3.0, "aux_distill/n_active_tok": 23.666666666666668, "aux_distill/step_loss": 1.483727087577184, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.7407407407407407, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.5583333333333332, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.21111111111111114, "calib/mean_conf": 0.8083333333333335, "calib/mu_c": 0.9666666666666667, "calib/mu_w": 0.7555555555555555, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.5583333333333332, "calib/std_conf": 0.3195004781773504, "calib/step_conf_rate": 0.0546875, "calib/step_q_c": 0.6784615384615384, "calib/step_q_c_n": 13.0, "calib/step_q_gap": 0.04570291777188318, "calib/step_q_w": 0.6327586206896553, "calib/step_q_w_n": 58.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 667.97265625, "completions/mean_terminated_length": 743.4826049804688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.020845437422394753, "learning_rate": 7.5e-07, "loss": 0.0895, "num_tokens": 809724.0, "reward": 0.027754880487918854, "reward_std": 0.07302109897136688, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.016447264701128006, "rewards/format_reward_step": 0.02734375, "step": 3 }, { "aux_distill/final_loss": 0.4729306432935927, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.16556998011138704, "aux_distill/mean_u": 0.3788802493180593, "aux_distill/n_active_final_tok": 3.111111111111111, "aux_distill/n_active_tok": 30.666666666666668, "aux_distill/step_loss": 1.1827691396077473, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.35000000000000003, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.850909090909091, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.7272727272727273, "calib/gap": 0.008999999999999897, "calib/mean_conf": 0.9418181818181819, "calib/mu_c": 0.95, "calib/mu_w": 0.9410000000000001, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.850909090909091, "calib/std_conf": 0.049141388188143925, "calib/step_conf_rate": 0.0390625, "calib/step_q_c": 0.98, "calib/step_q_c_n": 1.0, "calib/step_q_gap": 0.13941176470588246, "calib/step_q_w": 0.8405882352941175, "calib/step_q_w_n": 68.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3071.0, "completions/max_terminated_length": 3071.0, "completions/mean_length": 771.17578125, "completions/mean_terminated_length": 836.5296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.01598433405160904, "learning_rate": 1.0000000000000002e-06, "loss": 0.0348, "num_tokens": 1113313.0, "reward": 0.016414452344179153, "reward_std": 0.04068883880972862, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.005485156085342169, "rewards/format_reward_step": 0.0234375, "step": 4 }, { "aux_distill/final_loss": 0.43701744079589844, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.14601918309926987, "aux_distill/mean_u": 0.35960835996630164, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 28.571428571428573, "aux_distill/step_loss": 1.0231743454933167, "calib/answer_extract_rate": 0.046875, "calib/auroc": 1.0, "calib/avg_num_step_conf": 0.19921875, "calib/ece": 0.6382857142857142, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.05599999999999994, "calib/mean_conf": 0.924, "calib/mu_c": 0.964, "calib/mu_w": 0.908, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.05078125, "calib/nonempty_step_conf_rate": 0.03515625, "calib/pce": 0.6382857142857142, "calib/std_conf": 0.03778132569707647, "calib/step_conf_rate": 0.03515625, "calib/step_q_c": 0.8965, "calib/step_q_c_n": 12.0, "calib/step_q_gap": 0.16291025641025636, "calib/step_q_w": 0.7335897435897436, "calib/step_q_w_n": 39.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3063.0, "completions/max_terminated_length": 3063.0, "completions/mean_length": 738.2734375, "completions/mean_terminated_length": 814.6465454101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.013780995272099972, "learning_rate": 1.25e-06, "loss": 0.0589, "num_tokens": 1408999.0, "reward": 0.023182764649391174, "reward_std": 0.05281626060605049, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.011209281161427498, "rewards/format_reward_step": 0.02734375, "step": 5 }, { "aux_distill/final_loss": 0.623933769762516, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.10000000000000002, "aux_distill/loss": 0.19028319464996457, "aux_distill/mean_u": 0.2834644576625892, "aux_distill/n_active_final_tok": 5.25, "aux_distill/n_active_tok": 30.0, "aux_distill/step_loss": 1.2788981422781944, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.41875, "calib/avg_num_step_conf": 0.46875, "calib/ece": 0.6885714285714284, "calib/final_conf_rate": 0.08203125, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.8095238095238095, "calib/gap": 0.024124999999999952, "calib/mean_conf": 0.9076190476190474, "calib/mu_c": 0.9259999999999999, "calib/mu_w": 0.901875, "calib/nonempty_final_conf_rate": 0.08203125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.6790476190476189, "calib/std_conf": 0.1934130729295343, "calib/step_conf_rate": 0.09375, "calib/step_q_c": 0.7655555555555554, "calib/step_q_c_n": 27.0, "calib/step_q_gap": -0.006508960573476719, "calib/step_q_w": 0.7720645161290322, "calib/step_q_w_n": 93.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 575.43359375, "completions/mean_terminated_length": 621.5653686523438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.03270503506064415, "learning_rate": 1.5e-06, "loss": 0.0907, "num_tokens": 1662262.0, "reward": 0.06276483833789825, "reward_std": 0.1288246214389801, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.027873437851667404, "rewards/format_reward_step": 0.078125, "step": 6 }, { "aux_distill/final_loss": 0.480152342054579, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.17669792142179278, "aux_distill/mean_u": 0.3248833079364306, "aux_distill/n_active_final_tok": 4.444444444444445, "aux_distill/n_active_tok": 29.77777777777778, "aux_distill/step_loss": 1.2868268423610263, "calib/answer_extract_rate": 0.125, "calib/auroc": 0.5992063492063492, "calib/avg_num_step_conf": 0.54296875, "calib/ece": 0.6348, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.76, "calib/gap": 0.05682539682539678, "calib/mean_conf": 0.9148, "calib/mu_c": 0.9557142857142856, "calib/mu_w": 0.8988888888888888, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.140625, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6348, "calib/std_conf": 0.10564544476691837, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.8315384615384614, "calib/step_q_c_n": 26.0, "calib/step_q_gap": 0.04466346153846146, "calib/step_q_w": 0.786875, "calib/step_q_w_n": 112.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 700.63671875, "completions/mean_terminated_length": 769.7982788085938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.030571160838007927, "learning_rate": 1.75e-06, "loss": 0.1156, "num_tokens": 1949049.0, "reward": 0.06588749587535858, "reward_std": 0.1118675172328949, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.03021249920129776, "rewards/format_reward_step": 0.0703125, "step": 7 }, { "aux_distill/final_loss": 0.5085558457808061, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.19147660447792572, "aux_distill/mean_u": 0.3443807402134946, "aux_distill/n_active_final_tok": 3.6363636363636362, "aux_distill/n_active_tok": 24.727272727272727, "aux_distill/step_loss": 1.4062101624228738, "calib/answer_extract_rate": 0.0859375, "calib/auroc": 0.36363636363636365, "calib/avg_num_step_conf": 0.2734375, "calib/ece": 0.696153846153846, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.06500000000000006, "calib/mean_conf": 0.8500000000000001, "calib/mu_c": 0.905, "calib/mu_w": 0.84, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.696153846153846, "calib/std_conf": 0.2605910441816685, "calib/step_conf_rate": 0.0625, "calib/step_q_c": 0.7890909090909091, "calib/step_q_c_n": 11.0, "calib/step_q_gap": -0.02260400616332814, "calib/step_q_w": 0.8116949152542372, "calib/step_q_w_n": 59.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 3040.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 613.6875, "completions/mean_terminated_length": 692.0880737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.018061090260744095, "learning_rate": 2.0000000000000003e-06, "loss": 0.0622, "num_tokens": 2212665.0, "reward": 0.03364472836256027, "reward_std": 0.06432777643203735, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.016508202999830246, "rewards/format_reward_step": 0.0390625, "step": 8 }, { "aux_distill/final_loss": 0.39097317059834796, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.18374727790554365, "aux_distill/mean_u": 0.29662109967481987, "aux_distill/n_active_final_tok": 2.0, "aux_distill/n_active_tok": 15.666666666666666, "aux_distill/step_loss": 1.4464995861053467, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.75, "calib/avg_num_step_conf": 0.18359375, "calib/ece": 0.6016666666666666, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": 0.05249999999999999, "calib/mean_conf": 0.9349999999999999, "calib/mu_c": 0.97, "calib/mu_w": 0.9175, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.6016666666666666, "calib/std_conf": 0.06344288770224757, "calib/step_conf_rate": 0.046875, "calib/step_q_c": 0.8283333333333333, "calib/step_q_c_n": 6.0, "calib/step_q_gap": 0.07784552845528436, "calib/step_q_w": 0.7504878048780489, "calib/step_q_w_n": 41.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3015.0, "completions/max_terminated_length": 3015.0, "completions/mean_length": 616.828125, "completions/mean_terminated_length": 669.1016845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.021595383062958717, "learning_rate": 2.25e-06, "loss": 0.0848, "num_tokens": 2478109.0, "reward": 0.020723631605505943, "reward_std": 0.05178426578640938, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010197265073657036, "rewards/format_reward_step": 0.0234375, "step": 9 }, { "aux_distill/final_loss": 0.3737011637006487, "aux_distill/lambda": 0.1, "aux_distill/lambda_final": 0.1, "aux_distill/loss": 0.1851541953427451, "aux_distill/mean_u": 0.21911699772481932, "aux_distill/n_active_final_tok": 2.2857142857142856, "aux_distill/n_active_tok": 19.714285714285715, "aux_distill/step_loss": 1.4778407428945814, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.375, "calib/avg_num_step_conf": 0.26953125, "calib/ece": 0.8366666666666669, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": -0.019999999999999907, "calib/mean_conf": 0.9477777777777779, "calib/mu_c": 0.93, "calib/mu_w": 0.95, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.8366666666666669, "calib/std_conf": 0.03456966485800897, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8927272727272727, "calib/step_q_c_n": 11.0, "calib/step_q_gap": 0.06815830721003124, "calib/step_q_w": 0.8245689655172415, "calib/step_q_w_n": 58.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3055.0, "completions/max_terminated_length": 3055.0, "completions/mean_length": 652.09765625, "completions/mean_terminated_length": 707.3601684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.021187864243984222, "learning_rate": 2.5e-06, "loss": 0.09, "num_tokens": 2751846.0, "reward": 0.016434960067272186, "reward_std": 0.046485088765621185, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.005526171997189522, "rewards/format_reward_step": 0.01953125, "step": 10 }, { "aux_distill/final_loss": 0.37451341877812927, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.17273964635703876, "aux_distill/mean_u": 0.2728877732111283, "aux_distill/n_active_final_tok": 2.9565217391304346, "aux_distill/n_active_tok": 18.434782608695652, "aux_distill/step_loss": 1.3528830020324043, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.4296875, "calib/ece": 0.8352631578947369, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.7894736842105263, "calib/gap": -0.1570588235294117, "calib/mean_conf": 0.9405263157894738, "calib/mu_c": 0.8, "calib/mu_w": 0.9570588235294117, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.15625, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.8352631578947369, "calib/std_conf": 0.06637006968193183, "calib/step_conf_rate": 0.109375, "calib/step_q_c": 0.81, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.028242574257425823, "calib/step_q_w": 0.7817574257425742, "calib/step_q_w_n": 101.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2849.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 678.2109375, "completions/mean_terminated_length": 754.8782348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.030223537236452103, "learning_rate": 2.7500000000000004e-06, "loss": 0.149, "num_tokens": 3029948.0, "reward": 0.04105761647224426, "reward_std": 0.07741484045982361, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.007896484807133675, "rewards/format_reward_step": 0.0625, "step": 11 }, { "aux_distill/final_loss": 0.4656365607914172, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.17278049298022924, "aux_distill/mean_u": 0.4000783307520668, "aux_distill/n_active_final_tok": 4.631578947368421, "aux_distill/n_active_tok": 32.421052631578945, "aux_distill/step_loss": 1.262168332150108, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.30526315789473685, "calib/avg_num_step_conf": 0.609375, "calib/ece": 0.735, "calib/final_conf_rate": 0.09375, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.7916666666666666, "calib/gap": -0.011789473684210572, "calib/mean_conf": 0.9433333333333334, "calib/mu_c": 0.9339999999999999, "calib/mu_w": 0.9457894736842105, "calib/nonempty_final_conf_rate": 0.09375, "calib/nonempty_reasoning_rate": 0.1640625, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.735, "calib/std_conf": 0.07168604389202189, "calib/step_conf_rate": 0.125, "calib/step_q_c": 0.7010000000000001, "calib/step_q_c_n": 20.0, "calib/step_q_gap": -0.11686764705882347, "calib/step_q_w": 0.8178676470588235, "calib/step_q_w_n": 136.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2880.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 533.29296875, "completions/mean_terminated_length": 583.431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.02594468556344509, "learning_rate": 3e-06, "loss": 0.1238, "num_tokens": 3270647.0, "reward": 0.06582578271627426, "reward_std": 0.11194424331188202, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.026182813569903374, "rewards/format_reward_step": 0.0859375, "step": 12 }, { "aux_distill/final_loss": 0.3937861807644367, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.16708447442700466, "aux_distill/mean_u": 0.34409069323454206, "aux_distill/n_active_final_tok": 4.0, "aux_distill/n_active_tok": 27.333333333333332, "aux_distill/step_loss": 1.277058516939481, "calib/answer_extract_rate": 0.12890625, "calib/auroc": 0.49038461538461536, "calib/avg_num_step_conf": 0.671875, "calib/ece": 0.72934375, "calib/final_conf_rate": 0.125, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.78125, "calib/gap": 0.042858974358974455, "calib/mean_conf": 0.91684375, "calib/mu_c": 0.9516666666666667, "calib/mu_w": 0.9088076923076922, "calib/nonempty_final_conf_rate": 0.125, "calib/nonempty_reasoning_rate": 0.16796875, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.72934375, "calib/std_conf": 0.11660861604503116, "calib/step_conf_rate": 0.14453125, "calib/step_q_c": 0.7872727272727271, "calib/step_q_c_n": 22.0, "calib/step_q_gap": 0.002272727272727093, "calib/step_q_w": 0.785, "calib/step_q_w_n": 150.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2631.0, "completions/max_terminated_length": 2631.0, "completions/mean_length": 626.1328125, "completions/mean_terminated_length": 667.8750610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.029530515894293785, "learning_rate": 3.2500000000000002e-06, "loss": 0.1755, "num_tokens": 3535529.0, "reward": 0.06705722212791443, "reward_std": 0.1322254240512848, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.024739447981119156, "rewards/format_reward_step": 0.0859375, "step": 13 }, { "aux_distill/final_loss": 0.2387990951538086, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.13480552989575598, "aux_distill/mean_u": 0.25604527477694694, "aux_distill/n_active_final_tok": 4.222222222222222, "aux_distill/n_active_tok": 34.0, "aux_distill/step_loss": 1.1092561682065327, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.6142857142857143, "calib/avg_num_step_conf": 0.59765625, "calib/ece": 0.5931578947368421, "calib/final_conf_rate": 0.07421875, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.7894736842105263, "calib/gap": 0.059285714285714386, "calib/mean_conf": 0.8563157894736841, "calib/mu_c": 0.9, "calib/mu_w": 0.8407142857142856, "calib/nonempty_final_conf_rate": 0.07421875, "calib/nonempty_reasoning_rate": 0.1328125, "calib/nonempty_step_conf_rate": 0.1015625, "calib/pce": 0.5931578947368421, "calib/std_conf": 0.23414369241570643, "calib/step_conf_rate": 0.1015625, "calib/step_q_c": 0.6892592592592592, "calib/step_q_c_n": 27.0, "calib/step_q_gap": 0.03703465608465617, "calib/step_q_w": 0.6522246031746031, "calib/step_q_w_n": 126.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 650.390625, "completions/mean_terminated_length": 696.6527099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.021831141784787178, "learning_rate": 3.5e-06, "loss": 0.1041, "num_tokens": 3807429.0, "reward": 0.056269921362400055, "reward_std": 0.08187633007764816, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.03050859272480011, "rewards/format_reward_step": 0.05859375, "step": 14 }, { "aux_distill/final_loss": 0.20902437912790398, "aux_distill/lambda": 0.10000000000000003, "aux_distill/lambda_final": 0.10000000000000003, "aux_distill/loss": 0.13648096746519991, "aux_distill/mean_u": 0.28301270779921556, "aux_distill/n_active_final_tok": 3.5789473684210527, "aux_distill/n_active_tok": 26.94736842105263, "aux_distill/step_loss": 1.155785243762167, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.3452380952380953, "calib/avg_num_step_conf": 0.50390625, "calib/ece": 0.6547826086956522, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.43478260869565216, "calib/gap": -0.20214285714285707, "calib/mean_conf": 0.7095652173913044, "calib/mu_c": 0.525, "calib/mu_w": 0.7271428571428571, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.1171875, "calib/pce": 0.6386956521739131, "calib/std_conf": 0.32915541803979576, "calib/step_conf_rate": 0.1171875, "calib/step_q_c": 0.6079999999999999, "calib/step_q_c_n": 5.0, "calib/step_q_gap": 0.03203225806451604, "calib/step_q_w": 0.5759677419354838, "calib/step_q_w_n": 124.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 597.63671875, "completions/mean_terminated_length": 665.1956176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.016, "grad_norm": 0.020071037113666534, "learning_rate": 3.7500000000000005e-06, "loss": 0.0956, "num_tokens": 4068304.0, "reward": 0.05219629406929016, "reward_std": 0.11629487574100494, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.030173826962709427, "rewards/format_reward_step": 0.06640625, "step": 15 }, { "aux_distill/final_loss": 0.22446956237157187, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.14110560218493143, "aux_distill/mean_u": 0.47624583883926563, "aux_distill/n_active_final_tok": 2.3333333333333335, "aux_distill/n_active_tok": 21.333333333333332, "aux_distill/step_loss": 1.18658642967542, "calib/answer_extract_rate": 0.04296875, "calib/auroc": 0.5666666666666667, "calib/avg_num_step_conf": 0.25, "calib/ece": 0.5200000000000002, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.0234375, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.1146666666666667, "calib/mean_conf": 0.895, "calib/mu_c": 0.9666666666666667, "calib/mu_w": 0.852, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5200000000000002, "calib/std_conf": 0.188547076349648, "calib/step_conf_rate": 0.05859375, "calib/step_q_c": 0.8644444444444445, "calib/step_q_c_n": 9.0, "calib/step_q_gap": 0.31748080808080825, "calib/step_q_w": 0.5469636363636362, "calib/step_q_w_n": 55.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 703.5859375, "completions/mean_terminated_length": 783.1217041015625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.013928981497883797, "learning_rate": 4.000000000000001e-06, "loss": 0.0638, "num_tokens": 4357270.0, "reward": 0.025447461754083633, "reward_std": 0.06808701902627945, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.011832421645522118, "rewards/format_reward_step": 0.0234375, "step": 16 }, { "aux_distill/final_loss": 0.2633027576264881, "aux_distill/lambda": 0.10000000000000002, "aux_distill/lambda_final": 0.10000000000000002, "aux_distill/loss": 0.13817484215611503, "aux_distill/mean_u": 0.3446736028406753, "aux_distill/n_active_final_tok": 3.8095238095238093, "aux_distill/n_active_tok": 37.142857142857146, "aux_distill/step_loss": 1.118445634841919, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.8026315789473685, "calib/avg_num_step_conf": 0.765625, "calib/ece": 0.6379999999999999, "calib/final_conf_rate": 0.08984375, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.5217391304347826, "calib/gap": 0.12478947368421045, "calib/mean_conf": 0.8119130434782609, "calib/mu_c": 0.9149999999999999, "calib/mu_w": 0.7902105263157895, "calib/nonempty_final_conf_rate": 0.08984375, "calib/nonempty_reasoning_rate": 0.1484375, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.6379999999999999, "calib/std_conf": 0.24913867502875792, "calib/step_conf_rate": 0.12109375, "calib/step_q_c": 0.4613333333333333, "calib/step_q_c_n": 15.0, "calib/step_q_gap": -0.11110478821362796, "calib/step_q_w": 0.5724381215469613, "calib/step_q_w_n": 181.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 633.1796875, "completions/mean_terminated_length": 704.7564697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.01893424428999424, "learning_rate": 4.25e-06, "loss": 0.0975, "num_tokens": 4622892.0, "reward": 0.05825863778591156, "reward_std": 0.09287722408771515, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.030579781159758568, "rewards/format_reward_step": 0.06640625, "step": 17 }, { "aux_distill/final_loss": 0.19629781896417792, "aux_distill/lambda": 0.09999999999999999, "aux_distill/lambda_final": 0.09999999999999999, "aux_distill/loss": 0.13067916916175323, "aux_distill/mean_u": 0.32321768464600764, "aux_distill/n_active_final_tok": 3.272727272727273, "aux_distill/n_active_tok": 18.90909090909091, "aux_distill/step_loss": 1.110493849624287, "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.203125, "calib/ece": 0.734, "calib/final_conf_rate": 0.0390625, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.4, "calib/mean_conf": 0.7340000000000001, "calib/mu_c": NaN, "calib/mu_w": 0.7340000000000001, "calib/nonempty_final_conf_rate": 0.0390625, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.734, "calib/std_conf": 0.32255852182200984, "calib/step_conf_rate": 0.04296875, "calib/step_q_w": 0.44249999999999995, "calib/step_q_w_n": 52.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1328125, "completions/max_length": 2930.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 616.7109375, "completions/mean_terminated_length": 711.1621704101562, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.013651098124682903, "learning_rate": 4.5e-06, "loss": 0.0664, "num_tokens": 4891490.0, "reward": 0.02233300730586052, "reward_std": 0.05675097182393074, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.013416014611721039, "rewards/format_reward_step": 0.03125, "step": 18 }, { "aux_distill/final_loss": 0.11861236074141093, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.13091924147946493, "aux_distill/mean_u": 0.4190444197773418, "aux_distill/n_active_final_tok": 4.428571428571429, "aux_distill/n_active_tok": 31.428571428571427, "aux_distill/step_loss": 1.1905800317014967, "calib/answer_extract_rate": 0.19140625, "calib/auroc": 0.7720588235294117, "calib/avg_num_step_conf": 0.859375, "calib/ece": 0.3972222222222222, "calib/final_conf_rate": 0.140625, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": 0.4311764705882354, "calib/mean_conf": 0.4527777777777778, "calib/mu_c": 0.8600000000000001, "calib/mu_w": 0.4288235294117647, "calib/nonempty_final_conf_rate": 0.140625, "calib/nonempty_reasoning_rate": 0.25, "calib/nonempty_step_conf_rate": 0.203125, "calib/pce": 0.3972222222222222, "calib/std_conf": 0.36656332718850393, "calib/step_conf_rate": 0.203125, "calib/step_q_c": 0.34777777777777774, "calib/step_q_c_n": 9.0, "calib/step_q_gap": -0.05961179568193792, "calib/step_q_w": 0.40738957345971566, "calib/step_q_w_n": 211.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 2726.0, "completions/max_terminated_length": 2726.0, "completions/mean_length": 527.7734375, "completions/mean_terminated_length": 582.3706665039062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.020266666666666665, "grad_norm": 0.020213766023516655, "learning_rate": 4.75e-06, "loss": 0.1487, "num_tokens": 5131360.0, "reward": 0.09431396424770355, "reward_std": 0.14942510426044464, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.0753466784954071, "rewards/format_reward_step": 0.1015625, "step": 19 }, { "aux_distill/final_loss": 0.1630369145423174, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.11918571154619086, "aux_distill/mean_u": 0.31566692087739423, "aux_distill/n_active_final_tok": 5.931034482758621, "aux_distill/n_active_tok": 47.310344827586206, "aux_distill/step_loss": 1.0288201899364078, "calib/answer_extract_rate": 0.265625, "calib/auroc": 0.4778012684989429, "calib/avg_num_step_conf": 1.33984375, "calib/ece": 0.4133395740740742, "calib/final_conf_rate": 0.2109375, "calib/format_rate": 0.1484375, "calib/frac_conf_gt_0.9": 0.2222222222222222, "calib/gap": -0.017851389006342477, "calib/mean_conf": 0.45148772222222217, "calib/mu_c": 0.4372727272727273, "calib/mu_w": 0.4551241162790698, "calib/nonempty_final_conf_rate": 0.2109375, "calib/nonempty_reasoning_rate": 0.3828125, "calib/nonempty_step_conf_rate": 0.27734375, "calib/pce": 0.33056179629629634, "calib/std_conf": 0.37462261055797835, "calib/step_conf_rate": 0.27734375, "calib/step_q_c": 0.3368372093023256, "calib/step_q_c_n": 43.0, "calib/step_q_gap": -0.06946942736434114, "calib/step_q_w": 0.4063066366666667, "calib/step_q_w_n": 300.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 3067.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 570.82421875, "completions/mean_terminated_length": 596.4530639648438, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.021333333333333333, "grad_norm": 0.015477465465664864, "learning_rate": 5e-06, "loss": 0.1807, "num_tokens": 5382363.0, "reward": 0.14076992869377136, "reward_std": 0.2674937844276428, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.09013360738754272, "rewards/format_reward_step": 0.1484375, "step": 20 }, { "aux_distill/final_loss": 0.07824607957154513, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.12015693659583727, "aux_distill/mean_u": 0.32890082250085595, "aux_distill/n_active_final_tok": 5.466666666666667, "aux_distill/n_active_tok": 37.46666666666667, "aux_distill/step_loss": 1.123323275645574, "calib/answer_extract_rate": 0.27734375, "calib/auroc": 0.6212765957446809, "calib/avg_num_step_conf": 1.109375, "calib/ece": 0.33134515062699055, "calib/final_conf_rate": 0.22265625, "calib/format_rate": 0.15625, "calib/frac_conf_gt_0.9": 0.15789473684210525, "calib/gap": 0.18378028905826505, "calib/mean_conf": 0.4314618669168691, "calib/mu_c": 0.583, "calib/mu_w": 0.3992197109417349, "calib/nonempty_final_conf_rate": 0.22265625, "calib/nonempty_reasoning_rate": 0.359375, "calib/nonempty_step_conf_rate": 0.2578125, "calib/pce": 0.29368421052631577, "calib/std_conf": 0.36305120477938346, "calib/step_conf_rate": 0.2578125, "calib/step_q_c": 0.6073076923076923, "calib/step_q_c_n": 26.0, "calib/step_q_gap": 0.2165480569679189, "calib/step_q_w": 0.3907596353397734, "calib/step_q_w_n": 258.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 551.85546875, "completions/mean_terminated_length": 601.170166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0224, "grad_norm": 0.0173453651368618, "learning_rate": 4.9722222222222224e-06, "loss": 0.2237, "num_tokens": 5626598.0, "reward": 0.15919816493988037, "reward_std": 0.28079676628112793, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.12308384478092194, "rewards/format_reward_step": 0.15625, "step": 21 }, { "aux_distill/final_loss": 0.10222903767134994, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.11608608160167933, "aux_distill/mean_u": 0.36473851604266366, "aux_distill/n_active_final_tok": 8.25, "aux_distill/n_active_tok": 48.75, "aux_distill/step_loss": 1.0586317628622055, "calib/answer_extract_rate": 0.34375, "calib/auroc": 0.34110169491525427, "calib/avg_num_step_conf": 1.53125, "calib/ece": 0.3980276056338028, "calib/final_conf_rate": 0.27734375, "calib/format_rate": 0.25, "calib/frac_conf_gt_0.9": 0.19718309859154928, "calib/gap": -0.1942820338983051, "calib/mean_conf": 0.40644563380281684, "calib/mu_c": 0.24499999999999997, "calib/mu_w": 0.43928203389830506, "calib/nonempty_final_conf_rate": 0.27734375, "calib/nonempty_reasoning_rate": 0.45703125, "calib/nonempty_step_conf_rate": 0.37890625, "calib/pce": 0.31772957746478875, "calib/std_conf": 0.3810141867483041, "calib/step_conf_rate": 0.37890625, "calib/step_q_c": 0.31976744186046513, "calib/step_q_c_n": 43.0, "calib/step_q_gap": -0.012243663891437495, "calib/step_q_w": 0.3320111057519026, "calib/step_q_w_n": 349.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2913.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 469.73828125, "completions/mean_terminated_length": 488.83331298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.023466666666666667, "grad_norm": 0.019904376938939095, "learning_rate": 4.944444444444445e-06, "loss": 0.248, "num_tokens": 5848667.0, "reward": 0.2241542935371399, "reward_std": 0.32099008560180664, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.1475273072719574, "rewards/format_reward_step": 0.25, "step": 22 }, { "aux_distill/final_loss": 0.08589777469751425, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.1131642828695476, "aux_distill/mean_u": 0.27366238856607294, "aux_distill/n_active_final_tok": 8.75, "aux_distill/n_active_tok": 49.375, "aux_distill/step_loss": 1.0457450337707996, "calib/answer_extract_rate": 0.3984375, "calib/auroc": 0.6170634920634921, "calib/avg_num_step_conf": 1.57421875, "calib/ece": 0.30312093023255815, "calib/final_conf_rate": 0.3359375, "calib/format_rate": 0.2578125, "calib/frac_conf_gt_0.9": 0.13953488372093023, "calib/gap": 0.15406706349206356, "calib/mean_conf": 0.3867279069767442, "calib/mu_c": 0.5157142857142858, "calib/mu_w": 0.36164722222222223, "calib/nonempty_final_conf_rate": 0.3359375, "calib/nonempty_reasoning_rate": 0.52734375, "calib/nonempty_step_conf_rate": 0.43359375, "calib/pce": 0.26352906976744184, "calib/std_conf": 0.3571084463815083, "calib/step_conf_rate": 0.43359375, "calib/step_q_c": 0.6026829268292683, "calib/step_q_c_n": 41.0, "calib/step_q_gap": 0.27729279912459975, "calib/step_q_w": 0.3253901277046685, "calib/step_q_w_n": 362.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 456.12890625, "completions/mean_terminated_length": 467.0760192871094, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.017549779266119003, "learning_rate": 4.9166666666666665e-06, "loss": 0.2146, "num_tokens": 6069372.0, "reward": 0.2545512318611145, "reward_std": 0.3819565176963806, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.188789963722229, "rewards/format_reward_step": 0.2578125, "step": 23 }, { "aux_distill/final_loss": 0.11669028896722011, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.11643350729718804, "aux_distill/mean_u": 0.39236320772460315, "aux_distill/n_active_final_tok": 12.5, "aux_distill/n_active_tok": 78.375, "aux_distill/step_loss": 1.0476447604596615, "calib/answer_extract_rate": 0.5234375, "calib/auroc": 0.4999999999999999, "calib/avg_num_step_conf": 2.45703125, "calib/ece": 0.23891773109243697, "calib/final_conf_rate": 0.46484375, "calib/format_rate": 0.3671875, "calib/frac_conf_gt_0.9": 0.04201680672268908, "calib/gap": 0.010720370101596544, "calib/mean_conf": 0.2658353781512605, "calib/mu_c": 0.2753846153846154, "calib/mu_w": 0.26466424528301885, "calib/nonempty_final_conf_rate": 0.46484375, "calib/nonempty_reasoning_rate": 0.6328125, "calib/nonempty_step_conf_rate": 0.54296875, "calib/pce": 0.19775470588235294, "calib/std_conf": 0.2908977995352265, "calib/step_conf_rate": 0.54296875, "calib/step_q_c": 0.26138888888888884, "calib/step_q_c_n": 36.0, "calib/step_q_gap": -0.011842957529105247, "calib/step_q_w": 0.2732318464179941, "calib/step_q_w_n": 593.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 501.94140625, "completions/mean_terminated_length": 507.893310546875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0256, "grad_norm": 0.01694255881011486, "learning_rate": 4.888888888888889e-06, "loss": 0.3453, "num_tokens": 6302381.0, "reward": 0.35615280270576477, "reward_std": 0.4103126525878906, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.2943369150161743, "rewards/format_reward_step": 0.3671875, "step": 24 }, { "aux_distill/final_loss": 0.1430833032936789, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.12002312182448804, "aux_distill/mean_u": 0.3420518475664192, "aux_distill/n_active_final_tok": 16.125, "aux_distill/n_active_tok": 83.25, "aux_distill/step_loss": 1.0571478921920061, "calib/answer_extract_rate": 0.63671875, "calib/auroc": 0.6062956204379562, "calib/avg_num_step_conf": 2.62109375, "calib/ece": 0.27120449312750566, "calib/final_conf_rate": 0.59765625, "calib/format_rate": 0.44140625, "calib/frac_conf_gt_0.9": 0.08496732026143791, "calib/gap": 0.11980306241964694, "calib/mean_conf": 0.36130036240855146, "calib/mu_c": 0.4685750000000001, "calib/mu_w": 0.34877193758035313, "calib/nonempty_final_conf_rate": 0.59765625, "calib/nonempty_reasoning_rate": 0.8046875, "calib/nonempty_step_conf_rate": 0.69140625, "calib/pce": 0.26396484606868215, "calib/std_conf": 0.3285230922100727, "calib/step_conf_rate": 0.69140625, "calib/step_q_c": 0.4666734693877551, "calib/step_q_c_n": 49.0, "calib/step_q_gap": 0.11133586206086299, "calib/step_q_w": 0.3553376073268921, "calib/step_q_w_n": 621.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 412.25, "completions/mean_terminated_length": 418.7936706542969, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.02666666666666667, "grad_norm": 0.01768523082137108, "learning_rate": 4.861111111111111e-06, "loss": 0.3526, "num_tokens": 6511141.0, "reward": 0.4240191578865051, "reward_std": 0.41996586322784424, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.33631956577301025, "rewards/format_reward_step": 0.44140625, "step": 25 }, { "aux_distill/final_loss": 0.11389542344841175, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.1183220089878887, "aux_distill/mean_u": 0.4198702327702227, "aux_distill/n_active_final_tok": 17.75, "aux_distill/n_active_tok": 92.75, "aux_distill/step_loss": 1.06932464055717, "calib/answer_extract_rate": 0.67578125, "calib/auroc": 0.3595157657657657, "calib/avg_num_step_conf": 2.8984375, "calib/ece": 0.3143400050782022, "calib/final_conf_rate": 0.625, "calib/format_rate": 0.51953125, "calib/frac_conf_gt_0.9": 0.06875, "calib/gap": -0.13188307598433094, "calib/mean_conf": 0.3295220213282022, "calib/mu_c": 0.2075301760426961, "calib/mu_w": 0.33941325202702705, "calib/nonempty_final_conf_rate": 0.625, "calib/nonempty_reasoning_rate": 0.78515625, "calib/nonempty_step_conf_rate": 0.671875, "calib/pce": 0.2844310132032022, "calib/std_conf": 0.29963368167290444, "calib/step_conf_rate": 0.671875, "calib/step_q_c": 0.33064194704822947, "calib/step_q_c_n": 31.0, "calib/step_q_gap": 0.008411026387188658, "calib/step_q_w": 0.3222309206610408, "calib/step_q_w_n": 711.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3060.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 409.63671875, "completions/mean_terminated_length": 411.2431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 14.0, "epoch": 0.027733333333333332, "grad_norm": 0.01667754165828228, "learning_rate": 4.833333333333333e-06, "loss": 0.31, "num_tokens": 6721248.0, "reward": 0.4849138557910919, "reward_std": 0.43144482374191284, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.4034214913845062, "rewards/format_reward_step": 0.51953125, "step": 26 }, { "aux_distill/final_loss": 0.09505854613962583, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.11087433947250247, "aux_distill/mean_u": 0.3520416102787173, "aux_distill/n_active_final_tok": 21.375, "aux_distill/n_active_tok": 112.0, "aux_distill/step_loss": 1.013684831559658, "calib/answer_extract_rate": 0.7890625, "calib/auroc": 0.6753246753246753, "calib/avg_num_step_conf": 3.5, "calib/ece": 0.2809422702281864, "calib/final_conf_rate": 0.7265625, "calib/format_rate": 0.64453125, "calib/frac_conf_gt_0.9": 0.04838709677419355, "calib/gap": 0.16678135850032755, "calib/mean_conf": 0.3400820551744229, "calib/mu_c": 0.49699999999999994, "calib/mu_w": 0.3302186414996724, "calib/nonempty_final_conf_rate": 0.7265625, "calib/nonempty_reasoning_rate": 0.91796875, "calib/nonempty_step_conf_rate": 0.83984375, "calib/pce": 0.2809422702281864, "calib/std_conf": 0.2850531043616158, "calib/step_conf_rate": 0.83984375, "calib/step_q_c": 0.5752857142857143, "calib/step_q_c_n": 49.0, "calib/step_q_gap": 0.2177253253635057, "calib/step_q_w": 0.3575603889222086, "calib/step_q_w_n": 847.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2435.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 346.09765625, "completions/mean_terminated_length": 348.8228454589844, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.0288, "grad_norm": 0.015667244791984558, "learning_rate": 4.805555555555556e-06, "loss": 0.2393, "num_tokens": 6915065.0, "reward": 0.6040114164352417, "reward_std": 0.4578133821487427, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.5205228328704834, "rewards/format_reward_step": 0.64453125, "step": 27 }, { "aux_distill/final_loss": 0.07474714342970401, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10493989638052881, "aux_distill/mean_u": 0.354241252103711, "aux_distill/n_active_final_tok": 20.75, "aux_distill/n_active_tok": 110.75, "aux_distill/step_loss": 0.9746518153697252, "calib/answer_extract_rate": 0.7578125, "calib/auroc": 0.37758051197357556, "calib/avg_num_step_conf": 3.47265625, "calib/ece": 0.2812024404896149, "calib/final_conf_rate": 0.73046875, "calib/format_rate": 0.62109375, "calib/frac_conf_gt_0.9": 0.0427807486631016, "calib/gap": -0.12303030936396864, "calib/mean_conf": 0.3104126718098503, "calib/mu_c": 0.19659318774585793, "calib/mu_w": 0.31962349710982657, "calib/nonempty_final_conf_rate": 0.73046875, "calib/nonempty_reasoning_rate": 0.88671875, "calib/nonempty_step_conf_rate": 0.796875, "calib/pce": 0.2583744010695187, "calib/std_conf": 0.27782210737333857, "calib/step_conf_rate": 0.796875, "calib/step_q_c": 0.3388571428571428, "calib/step_q_c_n": 35.0, "calib/step_q_gap": 0.012772158115358812, "calib/step_q_w": 0.326084984741784, "calib/step_q_w_n": 852.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 339.8828125, "completions/mean_terminated_length": 343.9130554199219, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.029866666666666666, "grad_norm": 0.013735142536461353, "learning_rate": 4.777777777777778e-06, "loss": 0.1881, "num_tokens": 7109019.0, "reward": 0.5857105851173401, "reward_std": 0.42725497484207153, "rewards/accuracy_reward_step": 0.0546875, "rewards/final_brier_reward_step": 0.4956399202346802, "rewards/format_reward_step": 0.62109375, "step": 28 }, { "aux_distill/final_loss": 0.06856484201853164, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10507733467966318, "aux_distill/mean_u": 0.36215323672527155, "aux_distill/n_active_final_tok": 24.75, "aux_distill/n_active_tok": 109.5, "aux_distill/step_loss": 0.9822084847837687, "calib/answer_extract_rate": 0.84765625, "calib/auroc": 0.5148529411764706, "calib/avg_num_step_conf": 3.421875, "calib/ece": 0.3110308512700327, "calib/final_conf_rate": 0.84765625, "calib/format_rate": 0.75390625, "calib/frac_conf_gt_0.9": 0.06912442396313365, "calib/gap": 0.010225580228504727, "calib/mean_conf": 0.3835107962111911, "calib/mu_c": 0.39293529411764705, "calib/mu_w": 0.3827097138891423, "calib/nonempty_final_conf_rate": 0.84765625, "calib/nonempty_reasoning_rate": 0.9296875, "calib/nonempty_step_conf_rate": 0.875, "calib/pce": 0.3081003168281695, "calib/std_conf": 0.2824604470729387, "calib/step_conf_rate": 0.875, "calib/step_q_c": 0.39023787878787886, "calib/step_q_c_n": 66.0, "calib/step_q_gap": 0.0162489130015665, "calib/step_q_w": 0.37398896578631236, "calib/step_q_w_n": 810.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 292.71484375, "completions/mean_terminated_length": 295.0196838378906, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.030933333333333334, "grad_norm": 0.011677009984850883, "learning_rate": 4.75e-06, "loss": 0.1245, "num_tokens": 7291082.0, "reward": 0.698754072189331, "reward_std": 0.38540327548980713, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.5771956443786621, "rewards/format_reward_step": 0.75390625, "step": 29 }, { "aux_distill/final_loss": 0.07209987913665827, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10355257894843817, "aux_distill/mean_u": 0.29610125990414, "aux_distill/n_active_final_tok": 26.625, "aux_distill/n_active_tok": 116.375, "aux_distill/step_loss": 0.9634258858859539, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.4930756843800322, "calib/avg_num_step_conf": 3.63671875, "calib/ece": 0.30701373725483133, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.05855855855855856, "calib/gap": -0.026933573287790114, "calib/mean_conf": 0.37244707058816473, "calib/mu_c": 0.3473333333333333, "calib/mu_w": 0.37426690662112344, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.92578125, "calib/pce": 0.30594662013771423, "calib/std_conf": 0.2842563330638594, "calib/step_conf_rate": 0.92578125, "calib/step_q_c": 0.40701492537313433, "calib/step_q_c_n": 67.0, "calib/step_q_gap": 0.019904566958119818, "calib/step_q_w": 0.3871103584150145, "calib/step_q_w_n": 864.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2519.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 284.31640625, "completions/mean_terminated_length": 284.31640625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.032, "grad_norm": 0.011391903273761272, "learning_rate": 4.722222222222222e-06, "loss": 0.134, "num_tokens": 7470851.0, "reward": 0.7274598479270935, "reward_std": 0.38493478298187256, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.599450945854187, "rewards/format_reward_step": 0.7890625, "step": 30 }, { "aux_distill/final_loss": 0.026639884941687342, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10087639489211142, "aux_distill/mean_u": 0.34303952978799784, "aux_distill/n_active_final_tok": 27.125, "aux_distill/n_active_tok": 116.375, "aux_distill/step_loss": 0.9821240454912186, "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.5539274322169059, "calib/avg_num_step_conf": 3.63671875, "calib/ece": 0.25141960898565074, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.03862660944206009, "calib/gap": 0.05544985377835743, "calib/mean_conf": 0.32780338580968515, "calib/mu_c": 0.37754166666666666, "calib/mu_w": 0.32209181288830924, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.23810935147492115, "calib/std_conf": 0.278105933595617, "calib/step_conf_rate": 0.91796875, "calib/step_q_c": 0.4104555555555555, "calib/step_q_c_n": 90.0, "calib/step_q_gap": 0.03663538551988377, "calib/step_q_w": 0.37382017003567175, "calib/step_q_w_n": 841.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1813.0, "completions/max_terminated_length": 1813.0, "completions/mean_length": 271.5078125, "completions/mean_terminated_length": 271.5078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.03306666666666667, "grad_norm": 0.010902057401835918, "learning_rate": 4.694444444444445e-06, "loss": 0.2134, "num_tokens": 7646269.0, "reward": 0.7762458324432373, "reward_std": 0.36868053674697876, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.6423354148864746, "rewards/format_reward_step": 0.81640625, "step": 31 }, { "aux_distill/final_loss": 0.06401024729711935, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10776355746202171, "aux_distill/mean_u": 0.34849096609468133, "aux_distill/n_active_final_tok": 28.75, "aux_distill/n_active_tok": 115.75, "aux_distill/step_loss": 1.0136253219097853, "calib/answer_extract_rate": 0.9140625, "calib/auroc": 0.4738944630248978, "calib/avg_num_step_conf": 3.6171875, "calib/ece": 0.24195064377682404, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.86328125, "calib/frac_conf_gt_0.9": 0.02575107296137339, "calib/gap": -0.011549424005945663, "calib/mean_conf": 0.3294914163090129, "calib/mu_c": 0.31923076923076926, "calib/mu_w": 0.3307801932367149, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.22992703862660946, "calib/std_conf": 0.23962689939454776, "calib/step_conf_rate": 0.953125, "calib/step_q_c": 0.39330275229357803, "calib/step_q_c_n": 109.0, "calib/step_q_gap": 0.034800549111203505, "calib/step_q_w": 0.3585022031823745, "calib/step_q_w_n": 817.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1978.0, "completions/max_terminated_length": 1978.0, "completions/mean_length": 258.890625, "completions/mean_terminated_length": 258.890625, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.034133333333333335, "grad_norm": 0.011250043287873268, "learning_rate": 4.666666666666667e-06, "loss": 0.108, "num_tokens": 7819249.0, "reward": 0.8262053728103638, "reward_std": 0.3190339207649231, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.6875669956207275, "rewards/format_reward_step": 0.86328125, "step": 32 }, { "aux_distill/final_loss": 0.06823475078999763, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10778754344210029, "aux_distill/mean_u": 0.36732323440413506, "aux_distill/n_active_final_tok": 29.25, "aux_distill/n_active_tok": 115.375, "aux_distill/step_loss": 1.0096406731754541, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5757847533632288, "calib/avg_num_step_conf": 3.60546875, "calib/ece": 0.2554921504225731, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.04938271604938271, "calib/gap": 0.06669241007764448, "calib/mean_conf": 0.3377966771715444, "calib/mu_c": 0.39899999999999997, "calib/mu_w": 0.3323075899223555, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.2554921504225731, "calib/std_conf": 0.25518815943970735, "calib/step_conf_rate": 0.94921875, "calib/step_q_c": 0.32671875000000006, "calib/step_q_c_n": 64.0, "calib/step_q_gap": -0.03693933213351014, "calib/step_q_w": 0.3636580821335102, "calib/step_q_w_n": 859.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2101.0, "completions/max_terminated_length": 2101.0, "completions/mean_length": 240.44921875, "completions/mean_terminated_length": 241.3921661376953, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0352, "grad_norm": 0.01088921632617712, "learning_rate": 4.638888888888889e-06, "loss": 0.1041, "num_tokens": 7987676.0, "reward": 0.8405758142471313, "reward_std": 0.2909170389175415, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7124015688896179, "rewards/format_reward_step": 0.890625, "step": 33 }, { "aux_distill/final_loss": 0.060225540088140406, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10242805699817836, "aux_distill/mean_u": 0.3273371979523068, "aux_distill/n_active_final_tok": 29.375, "aux_distill/n_active_tok": 128.875, "aux_distill/step_loss": 0.9640550166368484, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.40304528891202496, "calib/avg_num_step_conf": 4.03125, "calib/ece": 0.26849306686419755, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.02880658436213992, "calib/gap": -0.07045224862467475, "calib/mean_conf": 0.31905290225514404, "calib/mu_c": 0.25352941176470584, "calib/mu_w": 0.3239816603893806, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.258793560691358, "calib/std_conf": 0.2436208075519449, "calib/step_conf_rate": 0.96875, "calib/step_q_c": 0.3027536231884059, "calib/step_q_c_n": 69.0, "calib/step_q_gap": -0.04627410266829196, "calib/step_q_w": 0.34902772585669783, "calib/step_q_w_n": 963.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1460.0, "completions/max_terminated_length": 1460.0, "completions/mean_length": 233.73046875, "completions/mean_terminated_length": 234.64707946777344, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.03626666666666667, "grad_norm": 0.010752441361546516, "learning_rate": 4.611111111111112e-06, "loss": 0.0846, "num_tokens": 8152623.0, "reward": 0.8435355424880981, "reward_std": 0.292891263961792, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.7222274541854858, "rewards/format_reward_step": 0.8984375, "step": 34 }, { "aux_distill/final_loss": 0.06452472699311329, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10324789304286242, "aux_distill/mean_u": 0.3524373133250392, "aux_distill/n_active_final_tok": 29.875, "aux_distill/n_active_tok": 130.0, "aux_distill/step_loss": 0.9679541885852814, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5807174887892377, "calib/avg_num_step_conf": 4.0625, "calib/ece": 0.25050661157024795, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.028925619834710745, "calib/gap": 0.06446320509794667, "calib/mean_conf": 0.32901900826446284, "calib/mu_c": 0.388421052631579, "calib/mu_w": 0.3239578475336323, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.25050661157024795, "calib/std_conf": 0.24719545477582386, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.418375, "calib/step_q_c_n": 80.0, "calib/step_q_gap": 0.033822291666666615, "calib/step_q_w": 0.3845527083333334, "calib/step_q_w_n": 960.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 263.8046875, "completions/mean_terminated_length": 263.8046875, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "epoch": 0.037333333333333336, "grad_norm": 0.010321549139916897, "learning_rate": 4.583333333333333e-06, "loss": 0.2327, "num_tokens": 8329413.0, "reward": 0.8620861768722534, "reward_std": 0.25374510884284973, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7397972345352173, "rewards/format_reward_step": 0.90625, "step": 35 }, { "aux_distill/final_loss": 0.04324473983433563, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10079836356453598, "aux_distill/mean_u": 0.34983613467249147, "aux_distill/n_active_final_tok": 30.25, "aux_distill/n_active_tok": 138.75, "aux_distill/step_loss": 0.9647388868033886, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5186206120612061, "calib/avg_num_step_conf": 4.3359375, "calib/ece": 0.17529593495934961, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.016260162601626018, "calib/gap": 0.006814041404140436, "calib/mean_conf": 0.3203138211382114, "calib/mu_c": 0.3259090909090909, "calib/mu_w": 0.3190950495049505, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.15837398373983744, "calib/std_conf": 0.226129813903, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3540217391304348, "calib/step_q_c_n": 184.0, "calib/step_q_gap": -0.0035948915391116887, "calib/step_q_w": 0.35761663066954646, "calib/step_q_w_n": 926.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2130.0, "completions/max_terminated_length": 2130.0, "completions/mean_length": 237.359375, "completions/mean_terminated_length": 237.359375, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 0.0384, "grad_norm": 0.009128491394221783, "learning_rate": 4.555555555555556e-06, "loss": 0.1158, "num_tokens": 8492889.0, "reward": 0.9270957708358765, "reward_std": 0.23009908199310303, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.7409102916717529, "rewards/format_reward_step": 0.9375, "step": 36 }, { "aux_distill/final_loss": 0.02983783091622172, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09856392722576857, "aux_distill/mean_u": 0.31691360948698644, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 140.375, "aux_distill/step_loss": 0.9558014236390591, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4738962294553658, "calib/avg_num_step_conf": 4.38671875, "calib/ece": 0.1954847086831275, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.00411522633744856, "calib/gap": -0.012209304236222962, "calib/mean_conf": 0.27109705436213993, "calib/mu_c": 0.2603448275862069, "calib/mu_w": 0.2725541318224299, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1736200996296296, "calib/std_conf": 0.20206661622017189, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2971818181818182, "calib/step_q_c_n": 110.0, "calib/step_q_gap": -0.02671931644799419, "calib/step_q_w": 0.3239011346298124, "calib/step_q_w_n": 1013.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1811.0, "completions/max_terminated_length": 1811.0, "completions/mean_length": 257.17578125, "completions/mean_terminated_length": 258.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.039466666666666664, "grad_norm": 0.008870480582118034, "learning_rate": 4.527777777777778e-06, "loss": 0.2145, "num_tokens": 8665822.0, "reward": 0.9225687980651855, "reward_std": 0.20187771320343018, "rewards/accuracy_reward_step": 0.1171875, "rewards/final_brier_reward_step": 0.7826376557350159, "rewards/format_reward_step": 0.9453125, "step": 37 }, { "aux_distill/final_loss": 0.04951605253154412, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10669704480096698, "aux_distill/mean_u": 0.39243347865395956, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 144.875, "aux_distill/step_loss": 1.017454382032156, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5411618741789519, "calib/avg_num_step_conf": 4.546875, "calib/ece": 0.20768253968253966, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.04232024521967592, "calib/mean_conf": 0.3030793650793651, "calib/mu_c": 0.34019354838709676, "calib/mu_w": 0.29787330316742083, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19387301587301584, "calib/std_conf": 0.22660456443288846, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.40094296296296295, "calib/step_q_c_n": 135.0, "calib/step_q_gap": 0.07968931864809414, "calib/step_q_w": 0.3212536443148688, "calib/step_q_w_n": 1029.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 651.0, "completions/max_terminated_length": 651.0, "completions/mean_length": 222.73828125, "completions/mean_terminated_length": 223.6117706298828, "completions/min_length": 0.0, "completions/min_terminated_length": 18.0, "epoch": 0.04053333333333333, "grad_norm": 0.009885660372674465, "learning_rate": 4.5e-06, "loss": 0.0605, "num_tokens": 8829731.0, "reward": 0.9365085959434509, "reward_std": 0.1839677095413208, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7870796918869019, "rewards/format_reward_step": 0.96484375, "step": 38 }, { "aux_distill/final_loss": 0.03649313731875736, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10016206814907491, "aux_distill/mean_u": 0.34258266360839884, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 149.125, "aux_distill/step_loss": 0.9651275295764208, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5206266729648874, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.19338709677419355, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.020161290322580645, "calib/gap": 0.0005133049913399468, "calib/mean_conf": 0.3078225806451613, "calib/mu_c": 0.30827586206896557, "calib/mu_w": 0.3077625570776256, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19213709677419355, "calib/std_conf": 0.23030856049765402, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3291964285714286, "calib/step_q_c_n": 112.0, "calib/step_q_gap": -0.0034354102140208287, "calib/step_q_w": 0.33263183878544944, "calib/step_q_w_n": 1081.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 256.28125, "completions/mean_terminated_length": 256.28125, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "epoch": 0.0416, "grad_norm": 0.008934065699577332, "learning_rate": 4.472222222222223e-06, "loss": 0.1541, "num_tokens": 9001427.0, "reward": 0.9320827722549438, "reward_std": 0.1839485764503479, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.7821344137191772, "rewards/format_reward_step": 0.96875, "step": 39 }, { "aux_distill/final_loss": 0.05308996573148761, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09999302681535482, "aux_distill/mean_u": 0.341460604743016, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 167.375, "aux_distill/step_loss": 0.9468402825295925, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5058177646673222, "calib/avg_num_step_conf": 5.3203125, "calib/ece": 0.17167667984189722, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.0030394296951818944, "calib/mean_conf": 0.2783960474308301, "calib/mu_c": 0.2811111111111111, "calib/mu_w": 0.2780716814159292, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.17167667984189722, "calib/std_conf": 0.186278735810632, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3487096774193548, "calib/step_q_c_n": 155.0, "calib/step_q_gap": 0.04356941229922223, "calib/step_q_w": 0.3051402651201326, "calib/step_q_w_n": 1207.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 680.0, "completions/max_terminated_length": 680.0, "completions/mean_length": 246.3046875, "completions/mean_terminated_length": 247.27059936523438, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.042666666666666665, "grad_norm": 0.010064591653645039, "learning_rate": 4.444444444444444e-06, "loss": 0.0759, "num_tokens": 9171241.0, "reward": 0.9558616280555725, "reward_std": 0.13117021322250366, "rewards/accuracy_reward_step": 0.10546875, "rewards/final_brier_reward_step": 0.825785756111145, "rewards/format_reward_step": 0.98046875, "step": 40 }, { "aux_distill/final_loss": 0.03107749327318743, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.10013743420131505, "aux_distill/mean_u": 0.3639978660274057, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 158.625, "aux_distill/step_loss": 0.9702968336641788, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5538297872340426, "calib/avg_num_step_conf": 4.95703125, "calib/ece": 0.1259919028340081, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03293723404255322, "calib/mean_conf": 0.27311740890688263, "calib/mu_c": 0.29978723404255325, "calib/mu_w": 0.26685000000000003, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10441295546558704, "calib/std_conf": 0.1901639990614291, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3252631578947368, "calib/step_q_c_n": 209.0, "calib/step_q_gap": 0.038083912611717896, "calib/step_q_w": 0.2871792452830189, "calib/step_q_w_n": 1060.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2412.0, "completions/max_terminated_length": 2412.0, "completions/mean_length": 248.796875, "completions/mean_terminated_length": 248.796875, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.04373333333333333, "grad_norm": 0.01017068326473236, "learning_rate": 4.416666666666667e-06, "loss": 0.0853, "num_tokens": 9342181.0, "reward": 0.9481226205825806, "reward_std": 0.20646090805530548, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.7673390507698059, "rewards/format_reward_step": 0.94140625, "step": 41 }, { "aux_distill/final_loss": 0.04891497686912771, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09970072889700532, "aux_distill/mean_u": 0.35922230188739757, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 167.125, "aux_distill/step_loss": 0.948092307895422, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4345588235294117, "calib/avg_num_step_conf": 5.2265625, "calib/ece": 0.1948976377952756, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.04478288770053476, "calib/mean_conf": 0.2570236220472441, "calib/mu_c": 0.21823529411764706, "calib/mu_w": 0.2630181818181818, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15903149606299213, "calib/std_conf": 0.1976297127939186, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26745562130177514, "calib/step_q_c_n": 169.0, "calib/step_q_gap": -0.02169065756905464, "calib/step_q_w": 0.2891462788708298, "calib/step_q_w_n": 1169.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 520.0, "completions/max_terminated_length": 520.0, "completions/mean_length": 238.9140625, "completions/mean_terminated_length": 239.8509979248047, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.0448, "grad_norm": 0.009412148036062717, "learning_rate": 4.388888888888889e-06, "loss": 0.1176, "num_tokens": 9507711.0, "reward": 0.9523229598999023, "reward_std": 0.1509455144405365, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.7991771697998047, "rewards/format_reward_step": 0.97265625, "step": 42 }, { "aux_distill/final_loss": 0.017450424864364322, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09527178690768778, "aux_distill/mean_u": 0.3074926680926342, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 165.5, "aux_distill/step_loss": 0.9352674260735512, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5001218323586744, "calib/avg_num_step_conf": 5.171875, "calib/ece": 0.1387244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.021141325536062405, "calib/mean_conf": 0.2671889763779528, "calib/mu_c": 0.24921052631578944, "calib/mu_w": 0.27035185185185184, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1281535433070866, "calib/std_conf": 0.19103756205181283, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3148258706467662, "calib/step_q_c_n": 201.0, "calib/step_q_gap": 0.01793148062005201, "calib/step_q_w": 0.2968943900267142, "calib/step_q_w_n": 1123.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1804.0, "completions/max_terminated_length": 1804.0, "completions/mean_length": 258.19140625, "completions/mean_terminated_length": 258.19140625, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.04586666666666667, "grad_norm": 0.009506667964160442, "learning_rate": 4.361111111111112e-06, "loss": 0.0957, "num_tokens": 9679032.0, "reward": 0.9611801505088806, "reward_std": 0.1372612565755844, "rewards/accuracy_reward_step": 0.1484375, "rewards/final_brier_reward_step": 0.7973603010177612, "rewards/format_reward_step": 0.9765625, "step": 43 }, { "aux_distill/final_loss": 0.016983458794129547, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09559705457650125, "aux_distill/mean_u": 0.3491452177791532, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 182.0, "aux_distill/step_loss": 0.9389870651066303, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.596326164874552, "calib/avg_num_step_conf": 5.75, "calib/ece": 0.12504150197628458, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.06651459293394774, "calib/mean_conf": 0.24211660079051384, "calib/mu_c": 0.29916666666666664, "calib/mu_w": 0.2326520737327189, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11243280632411067, "calib/std_conf": 0.17807459255864547, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30972527472527467, "calib/step_q_c_n": 182.0, "calib/step_q_gap": 0.050196127438452975, "calib/step_q_w": 0.2595291472868217, "calib/step_q_w_n": 1290.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 667.0, "completions/max_terminated_length": 667.0, "completions/mean_length": 264.6640625, "completions/mean_terminated_length": 265.70196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.046933333333333334, "grad_norm": 0.008401011116802692, "learning_rate": 4.333333333333334e-06, "loss": 0.0687, "num_tokens": 9853106.0, "reward": 0.9753538370132446, "reward_std": 0.13524678349494934, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8335201740264893, "rewards/format_reward_step": 0.9765625, "step": 44 }, { "aux_distill/final_loss": 0.02873411323525943, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09477901551872492, "aux_distill/mean_u": 0.2913159320204437, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 185.5, "aux_distill/step_loss": 0.9190560318529606, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5002170138888888, "calib/avg_num_step_conf": 5.796875, "calib/ece": 0.15745322580645163, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.009127777777777762, "calib/mean_conf": 0.25295, "calib/mu_c": 0.24500000000000002, "calib/mu_w": 0.2541277777777778, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14068548387096777, "calib/std_conf": 0.18730663513211732, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.22559006211180124, "calib/step_q_c_n": 161.0, "calib/step_q_gap": -0.04106481317164548, "calib/step_q_w": 0.2666548752834467, "calib/step_q_w_n": 1323.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 295.80078125, "completions/mean_terminated_length": 295.80078125, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "epoch": 0.048, "grad_norm": 0.007688302546739578, "learning_rate": 4.305555555555556e-06, "loss": 0.1917, "num_tokens": 10033879.0, "reward": 0.9475110769271851, "reward_std": 0.16948533058166504, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.8051784038543701, "rewards/format_reward_step": 0.96484375, "step": 45 }, { "aux_distill/final_loss": 0.027575008094572695, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09906627563759685, "aux_distill/mean_u": 0.3723311350278894, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 193.75, "aux_distill/step_loss": 0.9630877319723368, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49044551798174985, "calib/avg_num_step_conf": 6.0703125, "calib/ece": 0.18154761904761907, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.0033913043478261684, "calib/mean_conf": 0.24765873015873016, "calib/mu_c": 0.2504444444444445, "calib/mu_w": 0.24705314009661836, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12531746031746033, "calib/std_conf": 0.20316658512824695, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.278125, "calib/step_q_c_n": 224.0, "calib/step_q_gap": 0.01937590225563912, "calib/step_q_w": 0.2587490977443609, "calib/step_q_w_n": 1330.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 285.0, "completions/mean_terminated_length": 285.0, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.04906666666666667, "grad_norm": 0.007769672200083733, "learning_rate": 4.277777777777778e-06, "loss": 0.1511, "num_tokens": 10211607.0, "reward": 0.9743492007255554, "reward_std": 0.13994555175304413, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.7924484610557556, "rewards/format_reward_step": 0.98046875, "step": 46 }, { "aux_distill/final_loss": 0.02417274876643205, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0960873260628432, "aux_distill/mean_u": 0.362197381845286, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 196.5, "aux_distill/step_loss": 0.9367004819214344, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4843272171253823, "calib/avg_num_step_conf": 6.16796875, "calib/ece": 0.1505511811023622, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.008888888888888863, "calib/mean_conf": 0.21874015748031497, "calib/mu_c": 0.21111111111111114, "calib/mu_w": 0.22, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11377952755905511, "calib/std_conf": 0.18822415906750153, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21770408163265306, "calib/step_q_c_n": 196.0, "calib/step_q_gap": -0.02792903478094058, "calib/step_q_w": 0.24563311641359364, "calib/step_q_w_n": 1383.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 771.0, "completions/max_terminated_length": 771.0, "completions/mean_length": 264.59375, "completions/mean_terminated_length": 265.6313781738281, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.050133333333333335, "grad_norm": 0.007656062953174114, "learning_rate": 4.25e-06, "loss": 0.034, "num_tokens": 10385319.0, "reward": 0.9708345532417297, "reward_std": 0.11946520209312439, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8205753564834595, "rewards/format_reward_step": 0.98046875, "step": 47 }, { "aux_distill/final_loss": 0.020226882541464875, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09678884199820459, "aux_distill/mean_u": 0.3264913128481655, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 181.875, "aux_distill/step_loss": 0.9476615190505981, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.502124987926205, "calib/avg_num_step_conf": 5.68359375, "calib/ece": 0.12513385826771653, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.0021980102385781575, "calib/mean_conf": 0.20195275590551184, "calib/mu_c": 0.20019607843137258, "calib/mu_w": 0.20239408866995073, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0631496062992126, "calib/std_conf": 0.16469964828726347, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25000000000000006, "calib/step_q_c_n": 266.0, "calib/step_q_gap": 0.02378216989066448, "calib/step_q_w": 0.22621783010933558, "calib/step_q_w_n": 1189.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1571.0, "completions/max_terminated_length": 1571.0, "completions/mean_length": 264.6953125, "completions/mean_terminated_length": 265.73333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.0512, "grad_norm": 0.008458969183266163, "learning_rate": 4.222222222222223e-06, "loss": 0.0648, "num_tokens": 10556769.0, "reward": 0.9897904992103577, "reward_std": 0.12231529504060745, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.7959872484207153, "rewards/format_reward_step": 0.98046875, "step": 48 }, { "aux_distill/final_loss": 0.009723340248456225, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09664465487003326, "aux_distill/mean_u": 0.360260341416223, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 192.875, "aux_distill/step_loss": 0.9567231871187687, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5453703703703704, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.10180708661417325, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.03398203703703706, "calib/mean_conf": 0.20527952755905515, "calib/mu_c": 0.23203703703703707, "calib/mu_w": 0.198055, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04724409448818899, "calib/std_conf": 0.1555700864099637, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25839344262295083, "calib/step_q_c_n": 305.0, "calib/step_q_gap": 0.019905558939590534, "calib/step_q_w": 0.2384878836833603, "calib/step_q_w_n": 1238.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1855.0, "completions/max_terminated_length": 1855.0, "completions/mean_length": 278.89453125, "completions/mean_terminated_length": 278.89453125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.05226666666666667, "grad_norm": 0.007638960611075163, "learning_rate": 4.194444444444445e-06, "loss": 0.1125, "num_tokens": 10732702.0, "reward": 1.004418134689331, "reward_std": 0.10782338678836823, "rewards/accuracy_reward_step": 0.2109375, "rewards/final_brier_reward_step": 0.8096175193786621, "rewards/format_reward_step": 0.98828125, "step": 49 }, { "aux_distill/final_loss": 0.03108279372099787, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09588481183163822, "aux_distill/mean_u": 0.34428673420801703, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 212.5, "aux_distill/step_loss": 0.9277653079479933, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5084745762711865, "calib/avg_num_step_conf": 6.734375, "calib/ece": 0.123709765625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": -0.007617904155553662, "calib/mean_conf": 0.191680859375, "calib/mu_c": 0.18581864406779658, "calib/mu_w": 0.19343654822335024, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.042460937500000004, "calib/std_conf": 0.1701023200373057, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22982303206997085, "calib/step_q_c_n": 343.0, "calib/step_q_gap": 0.013334255820875995, "calib/step_q_w": 0.21648877624909485, "calib/step_q_w_n": 1381.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 311.10546875, "completions/mean_terminated_length": 312.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.05333333333333334, "grad_norm": 0.006806428078562021, "learning_rate": 4.166666666666667e-06, "loss": 0.0811, "num_tokens": 10917705.0, "reward": 1.0072999000549316, "reward_std": 0.08516330271959305, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7880373001098633, "rewards/format_reward_step": 0.99609375, "step": 50 }, { "aux_distill/final_loss": 0.019811689671769273, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09419922926463187, "aux_distill/mean_u": 0.2904671369685525, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 207.375, "aux_distill/step_loss": 0.9221805911511183, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5173215531271798, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.14816406249999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020904440827714504, "calib/mean_conf": 0.17777343750000002, "calib/mu_c": 0.19304347826086957, "calib/mu_w": 0.17213903743315506, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028203125000000002, "calib/std_conf": 0.15118323499791106, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21153452685421997, "calib/step_q_c_n": 391.0, "calib/step_q_gap": 0.0007765502005234737, "calib/step_q_w": 0.2107579766536965, "calib/step_q_w_n": 1285.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 752.0, "completions/max_terminated_length": 752.0, "completions/mean_length": 294.32421875, "completions/mean_terminated_length": 295.47845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.0544, "grad_norm": 0.0073488312773406506, "learning_rate": 4.138888888888889e-06, "loss": 0.0965, "num_tokens": 11102348.0, "reward": 1.020928144454956, "reward_std": 0.0940789133310318, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7762312293052673, "rewards/format_reward_step": 0.99609375, "step": 51 }, { "aux_distill/final_loss": 0.007637668693860178, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09585435572080314, "aux_distill/mean_u": 0.33941107293235234, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 189.125, "aux_distill/step_loss": 0.950905866920948, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4956369982547993, "calib/avg_num_step_conf": 5.9375, "calib/ece": 0.14287007874015747, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009514418681957876, "calib/mean_conf": 0.19697244094488192, "calib/mu_c": 0.2041269841269841, "calib/mu_w": 0.19461256544502623, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04590551181102362, "calib/std_conf": 0.1671904568053981, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2206233766233766, "calib/step_q_c_n": 385.0, "calib/step_q_gap": -0.009763407517592582, "calib/step_q_w": 0.2303867841409692, "calib/step_q_w_n": 1135.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 778.0, "completions/max_terminated_length": 778.0, "completions/mean_length": 289.40625, "completions/mean_terminated_length": 290.54119873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.055466666666666664, "grad_norm": 0.007487446069717407, "learning_rate": 4.111111111111111e-06, "loss": 0.1067, "num_tokens": 11284388.0, "reward": 1.0093071460723877, "reward_std": 0.11890524625778198, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.7803332209587097, "rewards/format_reward_step": 0.9921875, "step": 52 }, { "aux_distill/final_loss": 0.014197576827427838, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09383068815805018, "aux_distill/mean_u": 0.34173695455792147, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 218.875, "aux_distill/step_loss": 0.9241092819720507, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5298270254287404, "calib/avg_num_step_conf": 6.83984375, "calib/ece": 0.15933070866141733, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011014192785334137, "calib/mean_conf": 0.20807086614173226, "calib/mu_c": 0.21578947368421056, "calib/mu_w": 0.20477528089887642, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03409448818897638, "calib/std_conf": 0.1730180765715237, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23542208067940548, "calib/step_q_c_n": 471.0, "calib/step_q_gap": -0.009796669320594503, "calib/step_q_w": 0.24521874999999999, "calib/step_q_w_n": 1280.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2315.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 337.1328125, "completions/mean_terminated_length": 337.1328125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.05653333333333333, "grad_norm": 0.006950410548597574, "learning_rate": 4.083333333333334e-06, "loss": 0.1347, "num_tokens": 11476518.0, "reward": 1.0199216604232788, "reward_std": 0.12385044991970062, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7507808804512024, "rewards/format_reward_step": 0.9921875, "step": 53 }, { "aux_distill/final_loss": 0.010009945388446795, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09145716740749776, "aux_distill/mean_u": 0.2808475809495889, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 203.625, "aux_distill/step_loss": 0.9045617207884789, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5724786931818182, "calib/avg_num_step_conf": 6.4453125, "calib/ece": 0.15970703125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.036664772727272726, "calib/mean_conf": 0.19935546875, "calib/mu_c": 0.2245625, "calib/mu_w": 0.18789772727272727, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023281249999999993, "calib/std_conf": 0.1821295325056257, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2658883826879271, "calib/step_q_c_n": 439.0, "calib/step_q_gap": 0.03788532736175035, "calib/step_q_w": 0.22800305532617673, "calib/step_q_w_n": 1211.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1219.0, "completions/max_terminated_length": 1219.0, "completions/mean_length": 301.80859375, "completions/mean_terminated_length": 302.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.0576, "grad_norm": 0.006956387776881456, "learning_rate": 4.055555555555556e-06, "loss": 0.0934, "num_tokens": 11660013.0, "reward": 1.0273517370224, "reward_std": 0.12011194974184036, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.750015914440155, "rewards/format_reward_step": 0.9921875, "step": 54 }, { "aux_distill/final_loss": 0.0013773050231975503, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09118994430173188, "aux_distill/mean_u": 0.3320995970261154, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 210.625, "aux_distill/step_loss": 0.9105221219360828, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4829365079365079, "calib/avg_num_step_conf": 6.609375, "calib/ece": 0.16142449799196787, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": -0.01906243386243392, "calib/mean_conf": 0.20363574297188755, "calib/mu_c": 0.18916666666666662, "calib/mu_w": 0.20822910052910054, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06204819277108434, "calib/std_conf": 0.19055744321386933, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.22871794871794873, "calib/step_q_c_n": 351.0, "calib/step_q_gap": -0.018766540469225046, "calib/step_q_w": 0.24748448918717378, "calib/step_q_w_n": 1341.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2219.0, "completions/max_terminated_length": 2219.0, "completions/mean_length": 335.859375, "completions/mean_terminated_length": 338.5039367675781, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.058666666666666666, "grad_norm": 0.006341911386698484, "learning_rate": 4.027777777777779e-06, "loss": 0.1049, "num_tokens": 11853817.0, "reward": 0.97525954246521, "reward_std": 0.16055387258529663, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7473939657211304, "rewards/format_reward_step": 0.96875, "step": 55 }, { "aux_distill/final_loss": 0.009648349008784862, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09182535042054951, "aux_distill/mean_u": 0.3306901748601806, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 215.375, "aux_distill/step_loss": 0.9086051415652037, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4698883161512028, "calib/avg_num_step_conf": 6.73046875, "calib/ece": 0.19055511811023623, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.04547766323024058, "calib/mean_conf": 0.2159015748031496, "calib/mu_c": 0.18116666666666664, "calib/mu_w": 0.22664432989690722, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08511811023622048, "calib/std_conf": 0.2136231738159996, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.22120760368663592, "calib/step_q_c_n": 434.0, "calib/step_q_gap": -0.0426764149324487, "calib/step_q_w": 0.2638840186190846, "calib/step_q_w_n": 1289.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2405.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 350.765625, "completions/mean_terminated_length": 350.765625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.05973333333333333, "grad_norm": 0.006642908789217472, "learning_rate": 4.000000000000001e-06, "loss": 0.1088, "num_tokens": 12050453.0, "reward": 0.9888845682144165, "reward_std": 0.11521496623754501, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7512066960334778, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_distill/final_loss": 0.012068207034644729, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09138673334382474, "aux_distill/mean_u": 0.29421892223544, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 231.0, "aux_distill/step_loss": 0.9017991088330746, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5083401416122004, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.2613253012048193, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0024999999999999745, "calib/mean_conf": 0.18903614457831325, "calib/mu_c": 0.18750000000000003, "calib/mu_w": 0.19, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.032409638554216864, "calib/std_conf": 0.18268379285388484, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24536891679748823, "calib/step_q_c_n": 637.0, "calib/step_q_gap": 0.017687398645673008, "calib/step_q_w": 0.22768151815181523, "calib/step_q_w_n": 1212.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2433.0, "completions/max_terminated_length": 2433.0, "completions/mean_length": 396.7109375, "completions/mean_terminated_length": 398.2666931152344, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0608, "grad_norm": 0.005428283475339413, "learning_rate": 3.972222222222223e-06, "loss": 0.1853, "num_tokens": 12258803.0, "reward": 1.01131272315979, "reward_std": 0.14329111576080322, "rewards/accuracy_reward_step": 0.37890625, "rewards/final_brier_reward_step": 0.6710629463195801, "rewards/format_reward_step": 0.97265625, "step": 57 }, { "aux_distill/final_loss": 0.016176513805476134, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08761006209533662, "aux_distill/mean_u": 0.2756479289251673, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 245.0, "aux_distill/step_loss": 0.8599240900948644, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5305960154026452, "calib/avg_num_step_conf": 7.6796875, "calib/ece": 0.21080240485829954, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": -0.0014380976058931683, "calib/mean_conf": 0.20757816194331982, "calib/mu_c": 0.20652433333333334, "calib/mu_w": 0.2079624309392265, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07558704453441295, "calib/std_conf": 0.20527577065297345, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2406767027835052, "calib/step_q_c_n": 388.0, "calib/step_q_gap": 0.030452875153593906, "calib/step_q_w": 0.2102238276299113, "calib/step_q_w_n": 1578.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 427.3359375, "completions/mean_terminated_length": 427.3359375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.06186666666666667, "grad_norm": 0.005393090657889843, "learning_rate": 3.944444444444445e-06, "loss": 0.1755, "num_tokens": 12474521.0, "reward": 0.9693734049797058, "reward_std": 0.1678314357995987, "rewards/accuracy_reward_step": 0.2578125, "rewards/final_brier_reward_step": 0.7239030599594116, "rewards/format_reward_step": 0.95703125, "step": 58 }, { "aux_distill/final_loss": 0.0203451911911543, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08644241420552135, "aux_distill/mean_u": 0.2678265314548987, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 233.5, "aux_distill/step_loss": 0.8440789300948381, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4464579336804604, "calib/avg_num_step_conf": 7.296875, "calib/ece": 0.24825454545454542, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.025314613592765156, "calib/mean_conf": 0.22107351778656129, "calib/mu_c": 0.20466404494382023, "calib/mu_w": 0.2299786585365854, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05877470355731226, "calib/std_conf": 0.21842382462612686, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24388932384341636, "calib/step_q_c_n": 562.0, "calib/step_q_gap": -0.0174102167385132, "calib/step_q_w": 0.26129954058192956, "calib/step_q_w_n": 1306.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2208.0, "completions/max_terminated_length": 2208.0, "completions/mean_length": 384.203125, "completions/mean_terminated_length": 384.203125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.06293333333333333, "grad_norm": 0.005627437960356474, "learning_rate": 3.916666666666667e-06, "loss": 0.1036, "num_tokens": 12679125.0, "reward": 1.0117087364196777, "reward_std": 0.13074266910552979, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6874798536300659, "rewards/format_reward_step": 0.98828125, "step": 59 }, { "aux_distill/final_loss": 0.009482828684667766, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08887755987234414, "aux_distill/mean_u": 0.2723864582715786, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 244.25, "aux_distill/step_loss": 0.8792927600443363, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49565323366915826, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.19774698795180726, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": -0.0018043548911277352, "calib/mean_conf": 0.19807630522088351, "calib/mu_c": 0.19676470588235295, "calib/mu_w": 0.1985690607734807, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.061365461847389564, "calib/std_conf": 0.2058185689731123, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21266968325791855, "calib/step_q_c_n": 442.0, "calib/step_q_gap": -0.02202555483731955, "calib/step_q_w": 0.2346952380952381, "calib/step_q_w_n": 1512.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2729.0, "completions/max_terminated_length": 2729.0, "completions/mean_length": 414.85546875, "completions/mean_terminated_length": 414.85546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.064, "grad_norm": 0.005547628737986088, "learning_rate": 3.88888888888889e-06, "loss": 0.164, "num_tokens": 12894184.0, "reward": 0.9815865755081177, "reward_std": 0.1615404486656189, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7287981510162354, "rewards/format_reward_step": 0.96875, "step": 60 }, { "aux_distill/final_loss": 0.006618549310587696, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08587677648756653, "aux_distill/mean_u": 0.2584981882242442, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 268.75, "aux_distill/step_loss": 0.8521491996943951, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5252466251298027, "calib/avg_num_step_conf": 8.3984375, "calib/ece": 0.28567928286852584, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": 0.014015316718587745, "calib/mean_conf": 0.20611354581673305, "calib/mu_c": 0.21415420560747664, "calib/mu_w": 0.2001388888888889, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.032749003984063746, "calib/std_conf": 0.2252031310944807, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25237496653279784, "calib/step_q_c_n": 747.0, "calib/step_q_gap": 0.029025002170716574, "calib/step_q_w": 0.22334996436208127, "calib/step_q_w_n": 1403.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2498.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 396.87890625, "completions/mean_terminated_length": 396.87890625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.06506666666666666, "grad_norm": 0.005713924765586853, "learning_rate": 3.861111111111112e-06, "loss": 0.2191, "num_tokens": 13099849.0, "reward": 1.0205470323562622, "reward_std": 0.1633109450340271, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6465628147125244, "rewards/format_reward_step": 0.9765625, "step": 61 }, { "aux_distill/final_loss": 0.0006032689170751837, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08355497196316719, "aux_distill/mean_u": 0.25818921676101536, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 283.75, "aux_distill/step_loss": 0.834946446120739, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5345317725752509, "calib/avg_num_step_conf": 8.8671875, "calib/ece": 0.18156626506024098, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": 0.01583444816053514, "calib/mean_conf": 0.21706827309236948, "calib/mu_c": 0.22876923076923075, "calib/mu_w": 0.2129347826086956, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06879518072289155, "calib/std_conf": 0.22198954206126015, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22276859504132232, "calib/step_q_c_n": 484.0, "calib/step_q_gap": -0.005437284018028177, "calib/step_q_w": 0.2282058790593505, "calib/step_q_w_n": 1786.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2936.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 481.65625, "completions/mean_terminated_length": 481.65625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.06613333333333334, "grad_norm": 0.004894188605248928, "learning_rate": 3.833333333333334e-06, "loss": 0.146, "num_tokens": 13330233.0, "reward": 0.9838611483573914, "reward_std": 0.15775221586227417, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7411597967147827, "rewards/format_reward_step": 0.97265625, "step": 62 }, { "aux_distill/final_loss": 0.010994663068231603, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08830803050659597, "aux_distill/mean_u": 0.3099173921582291, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 263.875, "aux_distill/step_loss": 0.8720856215804815, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4667717086834734, "calib/avg_num_step_conf": 8.24609375, "calib/ece": 0.24200355731225295, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.013549173669467784, "calib/mean_conf": 0.19372766798418972, "calib/mu_c": 0.18473058823529412, "calib/mu_w": 0.1982797619047619, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04988142292490119, "calib/std_conf": 0.201006340197766, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.21841632653061224, "calib/step_q_c_n": 735.0, "calib/step_q_gap": -0.03451608626008543, "calib/step_q_w": 0.25293241279069767, "calib/step_q_w_n": 1376.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2087.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 444.6328125, "completions/mean_terminated_length": 444.6328125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.0672, "grad_norm": 0.004928493872284889, "learning_rate": 3.8055555555555556e-06, "loss": 0.1096, "num_tokens": 13552699.0, "reward": 1.0073766708374023, "reward_std": 0.13989222049713135, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.698347270488739, "rewards/format_reward_step": 0.984375, "step": 63 }, { "aux_distill/final_loss": 0.004576153917696502, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.082359075313434, "aux_distill/mean_u": 0.2690879073024952, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 274.25, "aux_distill/step_loss": 0.8190145855769515, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5289093767867352, "calib/avg_num_step_conf": 8.84765625, "calib/ece": 0.2756566801619434, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": 0.004396119210977656, "calib/mean_conf": 0.1775417004048583, "calib/mu_c": 0.1803715909090909, "calib/mu_w": 0.17597547169811326, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04846153846153846, "calib/std_conf": 0.2040467381214395, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2388135520684736, "calib/step_q_c_n": 701.0, "calib/step_q_gap": 0.047575828283307364, "calib/step_q_w": 0.19123772378516624, "calib/step_q_w_n": 1564.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2459.0, "completions/max_terminated_length": 2459.0, "completions/mean_length": 446.28125, "completions/mean_terminated_length": 449.7952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.06826666666666667, "grad_norm": 0.004880514927208424, "learning_rate": 3.777777777777778e-06, "loss": 0.1574, "num_tokens": 13770723.0, "reward": 0.9896012544631958, "reward_std": 0.16695758700370789, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6745150089263916, "rewards/format_reward_step": 0.9609375, "step": 64 }, { "aux_distill/final_loss": 0.0014026463968548342, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.080779759446159, "aux_distill/mean_u": 0.2522538711397831, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 242.75, "aux_distill/step_loss": 0.8063949355855584, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5213543412671806, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.2562698412698412, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.016745558162923252, "calib/mean_conf": 0.18777777777777777, "calib/mu_c": 0.1982105263157895, "calib/mu_w": 0.18146496815286625, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.033531746031746024, "calib/std_conf": 0.22130166471981214, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24587326120556413, "calib/step_q_c_n": 647.0, "calib/step_q_gap": 0.014157431089734024, "calib/step_q_w": 0.2317158301158301, "calib/step_q_w_n": 1295.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 405.52734375, "completions/mean_terminated_length": 405.52734375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.06933333333333333, "grad_norm": 0.005717034917324781, "learning_rate": 3.7500000000000005e-06, "loss": 0.1686, "num_tokens": 13979562.0, "reward": 1.0164703130722046, "reward_std": 0.14506025612354279, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6774718761444092, "rewards/format_reward_step": 0.984375, "step": 65 }, { "aux_distill/final_loss": 0.0007709396513746469, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0867828888585791, "aux_distill/mean_u": 0.29476347266499914, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 282.75, "aux_distill/step_loss": 0.8670579399913549, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5325065366405202, "calib/avg_num_step_conf": 8.8359375, "calib/ece": 0.2633064516129032, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.009518761924952324, "calib/mean_conf": 0.17266129032258065, "calib/mu_c": 0.17876404494382026, "calib/mu_w": 0.16924528301886793, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03854838709677419, "calib/std_conf": 0.2014368101950606, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22776296296296297, "calib/step_q_c_n": 675.0, "calib/step_q_gap": 0.025058237065042344, "calib/step_q_w": 0.20270472589792063, "calib/step_q_w_n": 1587.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2444.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 491.45703125, "completions/mean_terminated_length": 493.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0704, "grad_norm": 0.0049970392137765884, "learning_rate": 3.7222222222222225e-06, "loss": 0.1521, "num_tokens": 14211727.0, "reward": 0.9928978681564331, "reward_std": 0.15571537613868713, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6732957363128662, "rewards/format_reward_step": 0.96484375, "step": 66 }, { "aux_distill/final_loss": 0.0029924702848802553, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08490731683559716, "aux_distill/mean_u": 0.26169637778988236, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 246.75, "aux_distill/step_loss": 0.8460806831717491, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5222349643221204, "calib/avg_num_step_conf": 7.78125, "calib/ece": 0.32205217391304347, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.013271527777777764, "calib/mean_conf": 0.16474624505928856, "calib/mu_c": 0.1723, "calib/mu_w": 0.15902847222222224, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027984189723320153, "calib/std_conf": 0.19943857176791385, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21303046272493575, "calib/step_q_c_n": 778.0, "calib/step_q_gap": -0.002558334639149923, "calib/step_q_w": 0.21558879736408568, "calib/step_q_w_n": 1214.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1549.0, "completions/max_terminated_length": 1549.0, "completions/mean_length": 433.0234375, "completions/mean_terminated_length": 436.4330749511719, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.07146666666666666, "grad_norm": 0.004873435944318771, "learning_rate": 3.694444444444445e-06, "loss": 0.073, "num_tokens": 14427589.0, "reward": 1.0285768508911133, "reward_std": 0.12726490199565887, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6430913209915161, "rewards/format_reward_step": 0.98828125, "step": 67 }, { "aux_distill/final_loss": 0.0005568949854932725, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08274573809467256, "aux_distill/mean_u": 0.24946699349708568, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 256.875, "aux_distill/step_loss": 0.8269004672765732, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4968096419709323, "calib/avg_num_step_conf": 8.03125, "calib/ece": 0.2826939024390244, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.016260162601626018, "calib/gap": -0.011313314427507937, "calib/mean_conf": 0.1962491869918699, "calib/mu_c": 0.18912087912087913, "calib/mu_w": 0.20043419354838707, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05451219512195122, "calib/std_conf": 0.22594439756723764, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24029900332225917, "calib/step_q_c_n": 602.0, "calib/step_q_gap": -0.0018046417946596949, "calib/step_q_w": 0.24210364511691887, "calib/step_q_w_n": 1454.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 471.109375, "completions/mean_terminated_length": 474.81890869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.07253333333333334, "grad_norm": 0.005671203602105379, "learning_rate": 3.6666666666666666e-06, "loss": 0.1928, "num_tokens": 14652281.0, "reward": 0.9734482765197754, "reward_std": 0.19877000153064728, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6422090530395508, "rewards/format_reward_step": 0.94921875, "step": 68 }, { "aux_distill/final_loss": 0.004735883499961346, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08559095812961459, "aux_distill/mean_u": 0.2745780916439154, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 238.375, "aux_distill/step_loss": 0.8511736784130335, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4968984321745058, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.276798418972332, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01020518064076345, "calib/mean_conf": 0.16913043478260872, "calib/mu_c": 0.16255555555555556, "calib/mu_w": 0.17276073619631901, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.045098814229249, "calib/std_conf": 0.2054668711999103, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.24599970544919, "calib/step_q_c_n": 679.0, "calib/step_q_gap": 0.03817218101922257, "calib/step_q_w": 0.20782752442996744, "calib/step_q_w_n": 1228.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2207.0, "completions/max_terminated_length": 2207.0, "completions/mean_length": 478.5234375, "completions/mean_terminated_length": 478.5234375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.0736, "grad_norm": 0.005036453250795603, "learning_rate": 3.638888888888889e-06, "loss": 0.1474, "num_tokens": 14879279.0, "reward": 1.0080058574676514, "reward_std": 0.13084906339645386, "rewards/accuracy_reward_step": 0.3515625, "rewards/final_brier_reward_step": 0.6800742149353027, "rewards/format_reward_step": 0.984375, "step": 69 }, { "aux_distill/final_loss": 0.01110040802086587, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08515842608176172, "aux_distill/mean_u": 0.26320584469456093, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 256.625, "aux_distill/step_loss": 0.8404838293790817, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5273792613636363, "calib/avg_num_step_conf": 8.12109375, "calib/ece": 0.27547540322580644, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.024193548387096774, "calib/gap": -0.006413636363636371, "calib/mean_conf": 0.17791169354838712, "calib/mu_c": 0.17377386363636363, "calib/mu_w": 0.1801875, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0492741935483871, "calib/std_conf": 0.22329706161560695, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22325165794066318, "calib/step_q_c_n": 573.0, "calib/step_q_gap": 0.023962082907462656, "calib/step_q_w": 0.19928957503320052, "calib/step_q_w_n": 1506.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2565.0, "completions/max_terminated_length": 2565.0, "completions/mean_length": 477.8359375, "completions/mean_terminated_length": 479.7098388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.07466666666666667, "grad_norm": 0.004834037274122238, "learning_rate": 3.6111111111111115e-06, "loss": 0.1402, "num_tokens": 15108597.0, "reward": 0.9890013337135315, "reward_std": 0.16583868861198425, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6655027270317078, "rewards/format_reward_step": 0.96875, "step": 70 }, { "aux_distill/final_loss": 0.0004364773476481787, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08168565214145929, "aux_distill/mean_u": 0.27507697667444936, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 259.25, "aux_distill/step_loss": 0.8164200261235237, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4487418831168831, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.21979674796747972, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012195121951219513, "calib/gap": -0.01907954545454546, "calib/mean_conf": 0.21565040650406503, "calib/mu_c": 0.20199999999999999, "calib/mu_w": 0.22107954545454545, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07544715447154472, "calib/std_conf": 0.2400139387463105, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2538742924528302, "calib/step_q_c_n": 424.0, "calib/step_q_gap": 0.004038063138409431, "calib/step_q_w": 0.2498362293144208, "calib/step_q_w_n": 1692.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2724.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 495.70703125, "completions/mean_terminated_length": 497.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.07573333333333333, "grad_norm": 0.0045654685236513615, "learning_rate": 3.5833333333333335e-06, "loss": 0.1787, "num_tokens": 15339906.0, "reward": 0.9661494493484497, "reward_std": 0.18799448013305664, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.6979237794876099, "rewards/format_reward_step": 0.9609375, "step": 71 }, { "aux_distill/final_loss": 0.0003922245759895304, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08633854042273015, "aux_distill/mean_u": 0.2690554787303164, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 246.25, "aux_distill/step_loss": 0.8629931565374136, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5078947368421053, "calib/avg_num_step_conf": 7.6953125, "calib/ece": 0.2662845849802371, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.0025096602265156087, "calib/mean_conf": 0.15885375494071147, "calib/mu_c": 0.1604210526315789, "calib/mu_w": 0.1579113924050633, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.024822134387351785, "calib/std_conf": 0.1917175086378693, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.224953125, "calib/step_q_c_n": 640.0, "calib/step_q_gap": 0.02508094454887219, "calib/step_q_w": 0.1998721804511278, "calib/step_q_w_n": 1330.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2916.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 451.59375, "completions/mean_terminated_length": 451.59375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.0768, "grad_norm": 0.004893281031399965, "learning_rate": 3.555555555555556e-06, "loss": 0.1686, "num_tokens": 15559922.0, "reward": 1.0133074522018433, "reward_std": 0.13455332815647125, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6711460947990417, "rewards/format_reward_step": 0.984375, "step": 72 }, { "aux_distill/final_loss": 0.0012096594082322554, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08101288764737546, "aux_distill/mean_u": 0.23852064830184302, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 241.125, "aux_distill/step_loss": 0.8089192043989897, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4784493833504625, "calib/avg_num_step_conf": 7.53515625, "calib/ece": 0.3429362549800796, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.004278198869475863, "calib/mean_conf": 0.15578884462151396, "calib/mu_c": 0.15341964285714285, "calib/mu_w": 0.15769784172661871, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.026254980079681273, "calib/std_conf": 0.1906614384154937, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20033562585969739, "calib/step_q_c_n": 727.0, "calib/step_q_gap": 0.0030925310177672416, "calib/step_q_w": 0.19724309484193014, "calib/step_q_w_n": 1202.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 455.16796875, "completions/mean_terminated_length": 455.16796875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.07786666666666667, "grad_norm": 0.005076461471617222, "learning_rate": 3.5277777777777784e-06, "loss": 0.1768, "num_tokens": 15783477.0, "reward": 1.013967752456665, "reward_std": 0.1508485972881317, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6138730049133301, "rewards/format_reward_step": 0.9765625, "step": 73 }, { "aux_distill/final_loss": 0.00911986950541177, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08642115711700171, "aux_distill/mean_u": 0.24410496762555028, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 221.5, "aux_distill/step_loss": 0.8550916835665703, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5061310330077707, "calib/avg_num_step_conf": 7.00390625, "calib/ece": 0.24390873015873016, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.004719826049761178, "calib/mean_conf": 0.1889484126984127, "calib/mu_c": 0.1857831325301205, "calib/mu_w": 0.19050295857988167, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05174603174603174, "calib/std_conf": 0.21468508784246035, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2299671052631579, "calib/step_q_c_n": 532.0, "calib/step_q_gap": -0.005350103301473369, "calib/step_q_w": 0.23531720856463126, "calib/step_q_w_n": 1261.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2217.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 418.3984375, "completions/mean_terminated_length": 420.03924560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.07893333333333333, "grad_norm": 0.005768840666860342, "learning_rate": 3.5e-06, "loss": 0.1246, "num_tokens": 15994515.0, "reward": 1.0006341934204102, "reward_std": 0.14704427123069763, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6965809464454651, "rewards/format_reward_step": 0.98046875, "step": 74 }, { "aux_distill/final_loss": 0.0003913615232704615, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0784449273487553, "aux_distill/mean_u": 0.23628307227973086, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 210.75, "aux_distill/step_loss": 0.7840579003095627, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48269230769230775, "calib/avg_num_step_conf": 6.5859375, "calib/ece": 0.3793307086614173, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.0010769230769230587, "calib/mean_conf": 0.1744488188976378, "calib/mu_c": 0.17392307692307693, "calib/mu_w": 0.175, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020984251968503936, "calib/std_conf": 0.20148926657727723, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.20788372093023252, "calib/step_q_c_n": 860.0, "calib/step_q_gap": 0.00592246185032938, "calib/step_q_w": 0.20196125907990314, "calib/step_q_w_n": 826.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 402.38671875, "completions/mean_terminated_length": 403.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.08, "grad_norm": 0.005615293513983488, "learning_rate": 3.4722222222222224e-06, "loss": 0.078, "num_tokens": 16202278.0, "reward": 1.0420210361480713, "reward_std": 0.1387816220521927, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5879480838775635, "rewards/format_reward_step": 0.98828125, "step": 75 }, { "aux_distill/final_loss": 0.0003459962840679509, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0877249448094517, "aux_distill/mean_u": 0.2889035125026195, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 214.125, "aux_distill/step_loss": 0.8769034389406443, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5242957746478873, "calib/avg_num_step_conf": 6.69140625, "calib/ece": 0.3186734126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.006201702944942322, "calib/mean_conf": 0.19045357142857144, "calib/mu_c": 0.1939481818181818, "calib/mu_w": 0.18774647887323948, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03630952380952381, "calib/std_conf": 0.2311935786750537, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2421694152923538, "calib/step_q_c_n": 667.0, "calib/step_q_gap": 0.030811862711091847, "calib/step_q_w": 0.21135755258126196, "calib/step_q_w_n": 1046.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 412.6015625, "completions/mean_terminated_length": 414.2196350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.08106666666666666, "grad_norm": 0.005647731013596058, "learning_rate": 3.444444444444445e-06, "loss": 0.0926, "num_tokens": 16410960.0, "reward": 1.0235514640808105, "reward_std": 0.15799517929553986, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6330406069755554, "rewards/format_reward_step": 0.984375, "step": 76 }, { "aux_distill/final_loss": 0.00035550804295780836, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0853228303603828, "aux_distill/mean_u": 0.2870328522394571, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 233.875, "aux_distill/step_loss": 0.8528727777302265, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4457294224059956, "calib/avg_num_step_conf": 7.30859375, "calib/ece": 0.3397609561752986, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.03664749967696085, "calib/mean_conf": 0.1509163346613546, "calib/mu_c": 0.1301834862385321, "calib/mu_w": 0.16683098591549295, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028207171314741035, "calib/std_conf": 0.1957179982341441, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.19174825174825175, "calib/step_q_c_n": 715.0, "calib/step_q_gap": -0.0004057274905025776, "calib/step_q_w": 0.19215397923875432, "calib/step_q_w_n": 1156.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3069.0, "completions/max_terminated_length": 3069.0, "completions/mean_length": 425.59375, "completions/mean_terminated_length": 427.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.08213333333333334, "grad_norm": 0.005499564111232758, "learning_rate": 3.416666666666667e-06, "loss": 0.1261, "num_tokens": 16624576.0, "reward": 1.0059542655944824, "reward_std": 0.13684764504432678, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6056585907936096, "rewards/format_reward_step": 0.98046875, "step": 77 }, { "aux_distill/final_loss": 0.02040222806544989, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08757964731194079, "aux_distill/mean_u": 0.2869950557474653, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 225.125, "aux_distill/step_loss": 0.8553942292928696, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.551591942820013, "calib/avg_num_step_conf": 7.03515625, "calib/ece": 0.3247389558232932, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.028787524366471695, "calib/mean_conf": 0.16755020080321284, "calib/mu_c": 0.1831578947368421, "calib/mu_w": 0.1543703703703704, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0172289156626506, "calib/std_conf": 0.21556506134494627, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.194529262086514, "calib/step_q_c_n": 786.0, "calib/step_q_gap": -0.01558482658343674, "calib/step_q_w": 0.21011408866995074, "calib/step_q_w_n": 1015.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2819.0, "completions/max_terminated_length": 2819.0, "completions/mean_length": 457.21484375, "completions/mean_terminated_length": 462.6363830566406, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.0832, "grad_norm": 0.005055857822299004, "learning_rate": 3.3888888888888893e-06, "loss": 0.1238, "num_tokens": 16849647.0, "reward": 1.0199202299118042, "reward_std": 0.17445480823516846, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.617965579032898, "rewards/format_reward_step": 0.97265625, "step": 78 }, { "aux_distill/final_loss": 0.00029105614953550685, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08373665565159172, "aux_distill/mean_u": 0.2616512076200522, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 208.75, "aux_distill/step_loss": 0.8370754849165678, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5389074900793651, "calib/avg_num_step_conf": 6.53515625, "calib/ece": 0.32423359375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01953125, "calib/gap": 0.02861765873015873, "calib/mean_conf": 0.16881328125, "calib/mu_c": 0.1849107142857143, "calib/mu_w": 0.15629305555555556, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027773437499999998, "calib/std_conf": 0.22332490340277414, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.21726239193083574, "calib/step_q_c_n": 694.0, "calib/step_q_gap": 0.012743801532470067, "calib/step_q_w": 0.20451859039836567, "calib/step_q_w_n": 979.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 994.0, "completions/max_terminated_length": 994.0, "completions/mean_length": 442.2734375, "completions/mean_terminated_length": 444.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.08426666666666667, "grad_norm": 0.0052482676692306995, "learning_rate": 3.3611111111111117e-06, "loss": 0.0962, "num_tokens": 17069245.0, "reward": 1.0397592782974243, "reward_std": 0.11951573193073273, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6459249258041382, "rewards/format_reward_step": 0.99609375, "step": 79 }, { "aux_distill/final_loss": 0.00031070784007170005, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0862661061109975, "aux_distill/mean_u": 0.28112182156062415, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 223.25, "aux_distill/step_loss": 0.8623503372073174, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.42532508127031765, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.40391304347826096, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.02922355588897224, "calib/mean_conf": 0.14106719367588932, "calib/mu_c": 0.12674418604651164, "calib/mu_w": 0.15596774193548388, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.017549407114624504, "calib/std_conf": 0.18393386697878347, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.17260024301336574, "calib/step_q_c_n": 823.0, "calib/step_q_gap": -0.031564694745970345, "calib/step_q_w": 0.20416493775933608, "calib/step_q_w_n": 964.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2339.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 425.28515625, "completions/mean_terminated_length": 426.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.08533333333333333, "grad_norm": 0.005360201466828585, "learning_rate": 3.3333333333333333e-06, "loss": 0.1185, "num_tokens": 17280278.0, "reward": 1.0216944217681885, "reward_std": 0.1372138261795044, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5551074743270874, "rewards/format_reward_step": 0.984375, "step": 80 }, { "aux_distill/final_loss": 0.0003905783214577241, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08434331661555916, "aux_distill/mean_u": 0.2819454667285919, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 222.625, "aux_distill/step_loss": 0.8430425636470318, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4894301470588235, "calib/avg_num_step_conf": 7.1484375, "calib/ece": 0.3860728744939271, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.012145748987854251, "calib/gap": -0.02349264705882348, "calib/mean_conf": 0.17805668016194334, "calib/mu_c": 0.1658823529411765, "calib/mu_w": 0.189375, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04117408906882591, "calib/std_conf": 0.2232595048457253, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.20123655913978494, "calib/step_q_c_n": 744.0, "calib/step_q_gap": -0.007897879165924077, "calib/step_q_w": 0.20913443830570902, "calib/step_q_w_n": 1086.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 469.6015625, "completions/mean_terminated_length": 475.16998291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.0864, "grad_norm": 0.005131955724209547, "learning_rate": 3.3055555555555558e-06, "loss": 0.1836, "num_tokens": 17506744.0, "reward": 1.0026121139526367, "reward_std": 0.19326549768447876, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5755367279052734, "rewards/format_reward_step": 0.96484375, "step": 81 }, { "aux_distill/final_loss": 0.0002871967699320521, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08434561325702816, "aux_distill/mean_u": 0.23573001576920807, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 212.375, "aux_distill/step_loss": 0.843168918043375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5748409669211196, "calib/avg_num_step_conf": 6.67578125, "calib/ece": 0.363803984063745, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.03437472646310433, "calib/mean_conf": 0.1901402390438247, "calib/mu_c": 0.20808083333333335, "calib/mu_w": 0.17370610687022903, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03792828685258964, "calib/std_conf": 0.2433397327036698, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.24587994428969362, "calib/step_q_c_n": 718.0, "calib/step_q_gap": 0.037715968507655295, "calib/step_q_w": 0.20816397578203832, "calib/step_q_w_n": 991.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 437.53515625, "completions/mean_terminated_length": 439.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 125.0, "epoch": 0.08746666666666666, "grad_norm": 0.005216378252953291, "learning_rate": 3.277777777777778e-06, "loss": 0.1722, "num_tokens": 17724305.0, "reward": 1.0312541723251343, "reward_std": 0.1625767946243286, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6132895946502686, "rewards/format_reward_step": 0.98046875, "step": 82 }, { "aux_distill/final_loss": 0.013176210077972428, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07911544980015606, "aux_distill/mean_u": 0.26459626715690676, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 234.125, "aux_distill/step_loss": 0.7779782712459564, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.46085120207927227, "calib/avg_num_step_conf": 7.62109375, "calib/ece": 0.3763855421686746, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": -0.01789863547758283, "calib/mean_conf": 0.1765461847389558, "calib/mu_c": 0.1668421052631579, "calib/mu_w": 0.18474074074074073, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04755020080321285, "calib/std_conf": 0.24044996071936003, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2100665338645418, "calib/step_q_c_n": 753.0, "calib/step_q_gap": 0.011218286786077697, "calib/step_q_w": 0.1988482470784641, "calib/step_q_w_n": 1198.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2998.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 490.8984375, "completions/mean_terminated_length": 498.69049072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.08853333333333334, "grad_norm": 0.004931866656988859, "learning_rate": 3.2500000000000002e-06, "loss": 0.1023, "num_tokens": 17957239.0, "reward": 0.9939597845077515, "reward_std": 0.18009626865386963, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5816695094108582, "rewards/format_reward_step": 0.9609375, "step": 83 }, { "aux_distill/final_loss": 0.00035623072062662686, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08428628486581147, "aux_distill/mean_u": 0.2673578162871099, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 197.125, "aux_distill/step_loss": 0.8425065949559212, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5251001421372271, "calib/avg_num_step_conf": 6.2109375, "calib/ece": 0.3058167330677291, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": 0.025248740147305854, "calib/mean_conf": 0.1901195219123506, "calib/mu_c": 0.20440366972477064, "calib/mu_w": 0.1791549295774648, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030836653386454183, "calib/std_conf": 0.2363947034813479, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23518846153846154, "calib/step_q_c_n": 650.0, "calib/step_q_gap": 0.017486333878887095, "calib/step_q_w": 0.21770212765957445, "calib/step_q_w_n": 940.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2197.0, "completions/max_terminated_length": 2197.0, "completions/mean_length": 410.01953125, "completions/mean_terminated_length": 413.2480163574219, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.0896, "grad_norm": 0.006021194159984589, "learning_rate": 3.2222222222222227e-06, "loss": 0.1134, "num_tokens": 18168124.0, "reward": 1.022384762763977, "reward_std": 0.16455532610416412, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6385195255279541, "rewards/format_reward_step": 0.98046875, "step": 84 }, { "aux_distill/final_loss": 0.006193090820715952, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07979736186098307, "aux_distill/mean_u": 0.2422023146113763, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 190.5, "aux_distill/step_loss": 0.7917805155739188, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5719209558823529, "calib/avg_num_step_conf": 6.1328125, "calib/ece": 0.30544354838709675, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.028225806451612902, "calib/gap": 0.05423319327731088, "calib/mean_conf": 0.20633064516129032, "calib/mu_c": 0.23607142857142854, "calib/mu_w": 0.18183823529411766, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03008064516129032, "calib/std_conf": 0.2503913138042453, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.25409425625920473, "calib/step_q_c_n": 679.0, "calib/step_q_gap": 0.04691131574293089, "calib/step_q_w": 0.20718294051627384, "calib/step_q_w_n": 891.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 453.69140625, "completions/mean_terminated_length": 460.89288330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.09066666666666667, "grad_norm": 0.005572810769081116, "learning_rate": 3.1944444444444443e-06, "loss": 0.1582, "num_tokens": 18392093.0, "reward": 1.0132523775100708, "reward_std": 0.1999160796403885, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6280671954154968, "rewards/format_reward_step": 0.9609375, "step": 85 }, { "aux_distill/final_loss": 0.0001749628336256137, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08336643897928298, "aux_distill/mean_u": 0.24921388933849128, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 198.0, "aux_distill/step_loss": 0.8334894105792046, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4834799608993158, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.2961417322834645, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.023622047244094488, "calib/gap": -0.02200977517106542, "calib/mean_conf": 0.19858267716535433, "calib/mu_c": 0.18515151515151518, "calib/mu_w": 0.2071612903225806, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05248031496062992, "calib/std_conf": 0.24160462401532942, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.26600856164383563, "calib/step_q_c_n": 584.0, "calib/step_q_gap": 0.03908856164383562, "calib/step_q_w": 0.22692, "calib/step_q_w_n": 1000.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 431.05859375, "completions/mean_terminated_length": 431.05859375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.09173333333333333, "grad_norm": 0.005810996517539024, "learning_rate": 3.1666666666666667e-06, "loss": 0.1372, "num_tokens": 18607956.0, "reward": 1.015267252922058, "reward_std": 0.14874380826950073, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6516281366348267, "rewards/format_reward_step": 0.9921875, "step": 86 }, { "aux_distill/final_loss": 0.00023801279348845128, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07791043911129236, "aux_distill/mean_u": 0.24190785824083708, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 198.0, "aux_distill/step_loss": 0.7788663636893034, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5239746383990489, "calib/avg_num_step_conf": 6.1875, "calib/ece": 0.45493600000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.04, "calib/gap": 0.0007821147876626577, "calib/mean_conf": 0.205256, "calib/mu_c": 0.205578231292517, "calib/mu_w": 0.20479611650485435, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.036096, "calib/std_conf": 0.2592800001234187, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2335923076923077, "calib/step_q_c_n": 910.0, "calib/step_q_gap": -0.006748938598493487, "calib/step_q_w": 0.2403412462908012, "calib/step_q_w_n": 674.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2902.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 408.984375, "completions/mean_terminated_length": 410.5882568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.0928, "grad_norm": 0.0059371222741901875, "learning_rate": 3.138888888888889e-06, "loss": 0.1722, "num_tokens": 18818152.0, "reward": 1.0399489402770996, "reward_std": 0.20458626747131348, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5291163921356201, "rewards/format_reward_step": 0.97265625, "step": 87 }, { "aux_distill/final_loss": 0.00039201670324473525, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0839028840418905, "aux_distill/mean_u": 0.23952964579450506, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 192.5, "aux_distill/step_loss": 0.8386368071660399, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5390609390609391, "calib/avg_num_step_conf": 6.19921875, "calib/ece": 0.31815040322580646, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.04032258064516129, "calib/gap": 0.007915371295371287, "calib/mean_conf": 0.18644637096774194, "calib/mu_c": 0.19101047619047617, "calib/mu_w": 0.1830951048951049, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04060483870967742, "calib/std_conf": 0.2359217388068951, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.26278109028960817, "calib/step_q_c_n": 587.0, "calib/step_q_gap": 0.04397039028960817, "calib/step_q_w": 0.2188107, "calib/step_q_w_n": 1000.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2828.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 455.66796875, "completions/mean_terminated_length": 462.90081787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.09386666666666667, "grad_norm": 0.005285973194986582, "learning_rate": 3.1111111111111116e-06, "loss": 0.15, "num_tokens": 19044651.0, "reward": 1.0013043880462646, "reward_std": 0.20347905158996582, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6276086568832397, "rewards/format_reward_step": 0.96484375, "step": 88 }, { "aux_distill/final_loss": 0.00025402456117262773, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08750179770868272, "aux_distill/mean_u": 0.2729576968269954, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 186.875, "aux_distill/step_loss": 0.8747639395296574, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5010689990281827, "calib/avg_num_step_conf": 5.83984375, "calib/ece": 0.3205952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.0007210884353741898, "calib/mean_conf": 0.19472222222222224, "calib/mu_c": 0.19514285714285717, "calib/mu_w": 0.19442176870748298, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04932539682539683, "calib/std_conf": 0.2676846681632419, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.22526802218114603, "calib/step_q_c_n": 541.0, "calib/step_q_gap": 0.010194646919091499, "calib/step_q_w": 0.21507337526205453, "calib/step_q_w_n": 954.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 456.59765625, "completions/mean_terminated_length": 456.59765625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.09493333333333333, "grad_norm": 0.005344375967979431, "learning_rate": 3.0833333333333336e-06, "loss": 0.167, "num_tokens": 19270428.0, "reward": 1.0064247846603394, "reward_std": 0.16095760464668274, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6261308789253235, "rewards/format_reward_step": 0.9765625, "step": 89 }, { "aux_distill/final_loss": 0.00018680925779790414, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08290021540597081, "aux_distill/mean_u": 0.27581140384172503, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 185.875, "aux_distill/step_loss": 0.828815333545208, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5554987212276215, "calib/avg_num_step_conf": 5.9296875, "calib/ece": 0.37179203187251, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": 0.0645884143222506, "calib/mean_conf": 0.20812828685258966, "calib/mu_c": 0.2377205882352941, "calib/mu_w": 0.1731321739130435, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019043824701195217, "calib/std_conf": 0.2730507936137177, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.248328488372093, "calib/step_q_c_n": 688.0, "calib/step_q_gap": 0.05466559680582794, "calib/step_q_w": 0.19366289156626507, "calib/step_q_w_n": 830.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 399.02734375, "completions/mean_terminated_length": 402.1692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.096, "grad_norm": 0.005783769767731428, "learning_rate": 3.055555555555556e-06, "loss": 0.1483, "num_tokens": 19475899.0, "reward": 1.0489718914031982, "reward_std": 0.19389018416404724, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5862250328063965, "rewards/format_reward_step": 0.98046875, "step": 90 }, { "aux_distill/final_loss": 0.00015373918563454936, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08251471724361181, "aux_distill/mean_u": 0.22730499874422605, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 188.25, "aux_distill/step_loss": 0.8249934185296297, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5268458528449549, "calib/avg_num_step_conf": 5.8828125, "calib/ece": 0.385896, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.048, "calib/gap": 0.03551953300404134, "calib/mean_conf": 0.198024, "calib/mu_c": 0.21493129770992367, "calib/mu_w": 0.17941176470588233, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.029959999999999994, "calib/std_conf": 0.2568820729907013, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2567781329923274, "calib/step_q_c_n": 782.0, "calib/step_q_gap": 0.03151570205310089, "calib/step_q_w": 0.2252624309392265, "calib/step_q_w_n": 724.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2495.0, "completions/max_terminated_length": 2495.0, "completions/mean_length": 425.76171875, "completions/mean_terminated_length": 427.431396484375, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.09706666666666666, "grad_norm": 0.005606526043266058, "learning_rate": 3.0277777777777776e-06, "loss": 0.1379, "num_tokens": 19692606.0, "reward": 1.0294326543807983, "reward_std": 0.18703590333461761, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5783966183662415, "rewards/format_reward_step": 0.96875, "step": 91 }, { "aux_distill/final_loss": 0.00023135661149353837, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08267123310361058, "aux_distill/mean_u": 0.20467404305081655, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 169.375, "aux_distill/step_loss": 0.8264809604734182, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5591839353307244, "calib/avg_num_step_conf": 5.29296875, "calib/ece": 0.29054563492063495, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.031746031746031744, "calib/gap": 0.07357448514787968, "calib/mean_conf": 0.19532738095238097, "calib/mu_c": 0.23707798165137617, "calib/mu_w": 0.1635034965034965, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.02666666666666666, "calib/std_conf": 0.2476480737194419, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.24213996247654787, "calib/step_q_c_n": 533.0, "calib/step_q_gap": -0.0040168501755202535, "calib/step_q_w": 0.24615681265206812, "calib/step_q_w_n": 822.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2905.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 416.05078125, "completions/mean_terminated_length": 416.05078125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.09813333333333334, "grad_norm": 0.005976582877337933, "learning_rate": 3e-06, "loss": 0.1585, "num_tokens": 19905835.0, "reward": 1.0285645723342896, "reward_std": 0.16882646083831787, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6547853350639343, "rewards/format_reward_step": 0.9765625, "step": 92 }, { "aux_distill/final_loss": 0.0001452214287382958, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08422124257776886, "aux_distill/mean_u": 0.2293708357468701, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 178.375, "aux_distill/step_loss": 0.8420671913772821, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5493368868583033, "calib/avg_num_step_conf": 5.578125, "calib/ece": 0.32116875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.020842540770353385, "calib/mean_conf": 0.2377375, "calib/mu_c": 0.2501941747572815, "calib/mu_w": 0.22935163398692812, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07828124999999998, "calib/std_conf": 0.2956682947948173, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2604112149532711, "calib/step_q_c_n": 535.0, "calib/step_q_gap": -0.02375575033228322, "calib/step_q_w": 0.2841669652855543, "calib/step_q_w_n": 893.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1096.0, "completions/max_terminated_length": 1096.0, "completions/mean_length": 397.40234375, "completions/mean_terminated_length": 398.9608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.0992, "grad_norm": 0.005867833737283945, "learning_rate": 2.9722222222222225e-06, "loss": 0.1096, "num_tokens": 20113346.0, "reward": 1.0210399627685547, "reward_std": 0.17285411059856415, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6475486159324646, "rewards/format_reward_step": 0.9921875, "step": 93 }, { "aux_distill/final_loss": 0.00018924074129245128, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07676532538607717, "aux_distill/mean_u": 0.20391541947647757, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 174.75, "aux_distill/step_loss": 0.7674639923498034, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5068449940066874, "calib/avg_num_step_conf": 5.4609375, "calib/ece": 0.38861111111111113, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07539682539682539, "calib/gap": 0.0010302189136331685, "calib/mean_conf": 0.21996031746031747, "calib/mu_c": 0.22049586776859503, "calib/mu_w": 0.21946564885496186, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06420634920634921, "calib/std_conf": 0.2948546830891139, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2472169811320755, "calib/step_q_c_n": 636.0, "calib/step_q_gap": 0.0038425716832565793, "calib/step_q_w": 0.2433744094488189, "calib/step_q_w_n": 762.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2772.0, "completions/max_terminated_length": 2772.0, "completions/mean_length": 408.67578125, "completions/mean_terminated_length": 408.67578125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.10026666666666667, "grad_norm": 0.006220919080078602, "learning_rate": 2.944444444444445e-06, "loss": 0.1714, "num_tokens": 20326647.0, "reward": 1.0158402919769287, "reward_std": 0.18431319296360016, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5824617147445679, "rewards/format_reward_step": 0.9765625, "step": 94 }, { "aux_distill/final_loss": 0.00012870838327216916, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08224111946765333, "aux_distill/mean_u": 0.28012217788833976, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 172.25, "aux_distill/step_loss": 0.8222824800759554, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.511155227831348, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.3976984126984128, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03571428571428571, "calib/gap": -0.01264511249763664, "calib/mean_conf": 0.18460317460317457, "calib/mu_c": 0.17813008130081298, "calib/mu_w": 0.19077519379844962, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.047103174603174605, "calib/std_conf": 0.2611942985159199, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22593749999999999, "calib/step_q_c_n": 640.0, "calib/step_q_gap": 0.01166920731707316, "calib/step_q_w": 0.21426829268292683, "calib/step_q_w_n": 738.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 401.68359375, "completions/mean_terminated_length": 403.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.10133333333333333, "grad_norm": 0.006113733630627394, "learning_rate": 2.916666666666667e-06, "loss": 0.117, "num_tokens": 20535606.0, "reward": 1.0196096897125244, "reward_std": 0.16846677660942078, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5743758082389832, "rewards/format_reward_step": 0.984375, "step": 95 }, { "aux_distill/final_loss": 0.0001608024745110015, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08210943453013897, "aux_distill/mean_u": 0.2503238241556181, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 159.25, "aux_distill/step_loss": 0.8209335319697857, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.546373326400624, "calib/avg_num_step_conf": 4.9921875, "calib/ece": 0.4583125490196078, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": 0.05711899779019888, "calib/mean_conf": 0.18435411764705883, "calib/mu_c": 0.20630573248407644, "calib/mu_w": 0.14918673469387755, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013490196078431368, "calib/std_conf": 0.2506256462564762, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23626459143968875, "calib/step_q_c_n": 771.0, "calib/step_q_gap": 0.05926518315566509, "calib/step_q_w": 0.17699940828402366, "calib/step_q_w_n": 507.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1516.0, "completions/max_terminated_length": 1516.0, "completions/mean_length": 367.65625, "completions/mean_terminated_length": 369.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.1024, "grad_norm": 0.0068721650168299675, "learning_rate": 2.888888888888889e-06, "loss": 0.1151, "num_tokens": 20735542.0, "reward": 1.0744065046310425, "reward_std": 0.14019350707530975, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5394378900527954, "rewards/format_reward_step": 0.99609375, "step": 96 }, { "aux_distill/final_loss": 0.00013028520993429993, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08546403562650084, "aux_distill/mean_u": 0.23556217314631908, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 175.25, "aux_distill/step_loss": 0.854510054923594, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5022513952308473, "calib/avg_num_step_conf": 5.50390625, "calib/ece": 0.3341732283464567, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.003915525114155244, "calib/mean_conf": 0.19858267716535433, "calib/mu_c": 0.20083333333333334, "calib/mu_w": 0.1969178082191781, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.053779527559055115, "calib/std_conf": 0.26903362904697825, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2331340579710145, "calib/step_q_c_n": 552.0, "calib/step_q_gap": 0.009210954120372705, "calib/step_q_w": 0.2239231038506418, "calib/step_q_w_n": 857.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1909.0, "completions/max_terminated_length": 1909.0, "completions/mean_length": 378.6875, "completions/mean_terminated_length": 380.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.10346666666666667, "grad_norm": 0.006627393886446953, "learning_rate": 2.861111111111111e-06, "loss": 0.1132, "num_tokens": 20937558.0, "reward": 1.0214437246322632, "reward_std": 0.160974383354187, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6288249492645264, "rewards/format_reward_step": 0.9921875, "step": 97 }, { "aux_distill/final_loss": 0.0001625970314762526, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08227646606974304, "aux_distill/mean_u": 0.27415824672963807, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 143.375, "aux_distill/step_loss": 0.8226020485162735, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4887820512820513, "calib/avg_num_step_conf": 4.671875, "calib/ece": 0.39055199999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.06, "calib/gap": -0.011074358974358989, "calib/mean_conf": 0.214408, "calib/mu_c": 0.2090923076923077, "calib/mu_w": 0.22016666666666668, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.04247999999999999, "calib/std_conf": 0.27076296928494487, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.25492631578947367, "calib/step_q_c_n": 570.0, "calib/step_q_gap": 0.05478254582142256, "calib/step_q_w": 0.2001437699680511, "calib/step_q_w_n": 626.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 371.4921875, "completions/mean_terminated_length": 377.388916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.10453333333333334, "grad_norm": 0.00674501433968544, "learning_rate": 2.8333333333333335e-06, "loss": 0.0583, "num_tokens": 21138844.0, "reward": 1.0168352127075195, "reward_std": 0.1888578236103058, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5571080446243286, "rewards/format_reward_step": 0.96875, "step": 98 }, { "aux_distill/final_loss": 0.0002174276502273642, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0827044548932463, "aux_distill/mean_u": 0.22012309214152356, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 157.0, "aux_distill/step_loss": 0.8268271088600159, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5498447204968944, "calib/avg_num_step_conf": 4.90625, "calib/ece": 0.20948818897637797, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.06692913385826772, "calib/gap": 0.07727795031055901, "calib/mean_conf": 0.19059055118110235, "calib/mu_c": 0.24657142857142858, "calib/mu_w": 0.16929347826086957, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.06224409448818897, "calib/std_conf": 0.2712947333651442, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3062349397590361, "calib/step_q_c_n": 332.0, "calib/step_q_gap": 0.09580203932613565, "calib/step_q_w": 0.21043290043290044, "calib/step_q_w_n": 924.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 435.6328125, "completions/mean_terminated_length": 435.6328125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.1056, "grad_norm": 0.006802734453231096, "learning_rate": 2.805555555555556e-06, "loss": 0.1235, "num_tokens": 21356166.0, "reward": 0.9909521341323853, "reward_std": 0.16204184293746948, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7319043278694153, "rewards/format_reward_step": 0.9765625, "step": 99 }, { "aux_distill/final_loss": 0.00017292160646320553, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08561063604429364, "aux_distill/mean_u": 0.27638954939157084, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 167.375, "aux_distill/step_loss": 0.8559334147721529, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5194937805557275, "calib/avg_num_step_conf": 5.265625, "calib/ece": 0.335913671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.046875, "calib/gap": 0.04397773995915588, "calib/mean_conf": 0.20025820312499998, "calib/mu_c": 0.22482389380530973, "calib/mu_w": 0.18084615384615385, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0473828125, "calib/std_conf": 0.27833485020557297, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25755390070921985, "calib/step_q_c_n": 564.0, "calib/step_q_gap": 0.0012479058112606634, "calib/step_q_w": 0.2563059948979592, "calib/step_q_w_n": 784.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 403.72265625, "completions/mean_terminated_length": 405.305908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.10666666666666667, "grad_norm": 0.006777219939976931, "learning_rate": 2.7777777777777783e-06, "loss": 0.0815, "num_tokens": 21566927.0, "reward": 1.0404518842697144, "reward_std": 0.14150357246398926, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6394974589347839, "rewards/format_reward_step": 1.0, "step": 100 }, { "aux_distill/final_loss": 0.00014713435496105376, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08331552986055613, "aux_distill/mean_u": 0.23223354301709642, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 172.25, "aux_distill/step_loss": 0.8330081449821591, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5067667654187988, "calib/avg_num_step_conf": 5.40625, "calib/ece": 0.3251880952380952, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": -0.010078534877457634, "calib/mean_conf": 0.2034626984126984, "calib/mu_c": 0.19714361702127656, "calib/mu_w": 0.2072221518987342, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07781746031746031, "calib/std_conf": 0.2780667059783874, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2471811320754717, "calib/step_q_c_n": 477.0, "calib/step_q_gap": 0.04255268665099543, "calib/step_q_w": 0.20462844542447628, "calib/step_q_w_n": 907.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2993.0, "completions/max_terminated_length": 2993.0, "completions/mean_length": 446.56640625, "completions/mean_terminated_length": 450.0826721191406, "completions/min_length": 0.0, "completions/min_terminated_length": 122.0, "epoch": 0.10773333333333333, "grad_norm": 0.006341983564198017, "learning_rate": 2.7500000000000004e-06, "loss": 0.1127, "num_tokens": 21788240.0, "reward": 0.9958369731903076, "reward_std": 0.17155815660953522, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6440176963806152, "rewards/format_reward_step": 0.98046875, "step": 101 }, { "aux_distill/final_loss": 0.007553740155685773, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07908109948039055, "aux_distill/mean_u": 0.22337070525279093, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 160.5, "aux_distill/step_loss": 0.7832572367042303, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5951015829490215, "calib/avg_num_step_conf": 5.015625, "calib/ece": 0.39274901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06666666666666667, "calib/gap": 0.10104025925464286, "calib/mean_conf": 0.2143098039215686, "calib/mu_c": 0.2590845070422535, "calib/mu_w": 0.15804424778761064, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.02509803921568627, "calib/std_conf": 0.2872497343843561, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2627881619937695, "calib/step_q_c_n": 642.0, "calib/step_q_gap": 0.02522570093457946, "calib/step_q_w": 0.23756246105919002, "calib/step_q_w_n": 642.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2382.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 377.078125, "completions/mean_terminated_length": 377.078125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.1088, "grad_norm": 0.007032997440546751, "learning_rate": 2.7222222222222224e-06, "loss": 0.1412, "num_tokens": 21991468.0, "reward": 1.069368839263916, "reward_std": 0.1699509620666504, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5957688093185425, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_distill/final_loss": 0.009660119393060995, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.09102308715227991, "aux_distill/mean_u": 0.280279918769106, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 148.125, "aux_distill/step_loss": 0.9005707409232855, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.502552467385139, "calib/avg_num_step_conf": 4.62890625, "calib/ece": 0.40069007936507933, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": -0.027315201361315933, "calib/mean_conf": 0.20438928571428572, "calib/mu_c": 0.19040650406504067, "calib/mu_w": 0.2177217054263566, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0584920634920635, "calib/std_conf": 0.27734273551106337, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.25415204678362574, "calib/step_q_c_n": 513.0, "calib/step_q_gap": -0.03337191154970759, "calib/step_q_w": 0.28752395833333333, "calib/step_q_w_n": 672.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2869.0, "completions/max_terminated_length": 2869.0, "completions/mean_length": 463.85546875, "completions/mean_terminated_length": 463.85546875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.10986666666666667, "grad_norm": 0.006891104858368635, "learning_rate": 2.6944444444444444e-06, "loss": 0.1201, "num_tokens": 22214767.0, "reward": 1.0047416687011719, "reward_std": 0.19548439979553223, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.560264527797699, "rewards/format_reward_step": 0.96875, "step": 103 }, { "aux_distill/final_loss": 0.00011444762037626788, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08198370668105781, "aux_distill/mean_u": 0.23481959256142368, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 162.625, "aux_distill/step_loss": 0.8197226040065289, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5480793293293293, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.35988235294117643, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.08235294117647059, "calib/gap": 0.01469782282282281, "calib/mean_conf": 0.20341176470588238, "calib/mu_c": 0.21171171171171171, "calib/mu_w": 0.1970138888888889, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.064, "calib/std_conf": 0.2972681615336423, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.28513866666666665, "calib/step_q_c_n": 525.0, "calib/step_q_gap": 0.0488757800687285, "calib/step_q_w": 0.23626288659793815, "calib/step_q_w_n": 776.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 408.1015625, "completions/mean_terminated_length": 409.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.11093333333333333, "grad_norm": 0.0072467573918402195, "learning_rate": 2.666666666666667e-06, "loss": 0.0866, "num_tokens": 22425921.0, "reward": 1.0171546936035156, "reward_std": 0.17794030904769897, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.6124343872070312, "rewards/format_reward_step": 0.98828125, "step": 104 }, { "aux_distill/final_loss": 0.012328775661444524, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08135199698153883, "aux_distill/mean_u": 0.21711008744151833, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 166.375, "aux_distill/step_loss": 0.8011911753565073, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5233015873015873, "calib/avg_num_step_conf": 5.203125, "calib/ece": 0.41362231075697214, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.09163346613545817, "calib/gap": 0.013755746031746036, "calib/mean_conf": 0.2204812749003984, "calib/mu_c": 0.22733174603174602, "calib/mu_w": 0.213576, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06605577689243027, "calib/std_conf": 0.31173055904450453, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2567628424657534, "calib/step_q_c_n": 584.0, "calib/step_q_gap": -0.05514491154494172, "calib/step_q_w": 0.31190775401069515, "calib/step_q_w_n": 748.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 460.84375, "completions/mean_terminated_length": 464.4724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.112, "grad_norm": 0.006027123890817165, "learning_rate": 2.6388888888888893e-06, "loss": 0.1147, "num_tokens": 22649657.0, "reward": 1.018819808959961, "reward_std": 0.21033445000648499, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5688895583152771, "rewards/format_reward_step": 0.9765625, "step": 105 }, { "aux_distill/final_loss": 0.00011433741019573063, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08694498904515058, "aux_distill/mean_u": 0.25351431589983237, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 144.75, "aux_distill/step_loss": 0.8693355321884155, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5080436565516371, "calib/avg_num_step_conf": 4.5234375, "calib/ece": 0.358015873015873, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.020511008769162842, "calib/mean_conf": 0.21603174603174605, "calib/mu_c": 0.22669421487603306, "calib/mu_w": 0.20618320610687021, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04694444444444444, "calib/std_conf": 0.2687377294801713, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29774422735346356, "calib/step_q_c_n": 563.0, "calib/step_q_gap": 0.04271901726942995, "calib/step_q_w": 0.2550252100840336, "calib/step_q_w_n": 595.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2663.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 399.9296875, "completions/mean_terminated_length": 401.4980773925781, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.11306666666666666, "grad_norm": 0.007543689571321011, "learning_rate": 2.6111111111111113e-06, "loss": 0.1226, "num_tokens": 22856623.0, "reward": 1.0349606275558472, "reward_std": 0.16685569286346436, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.60898357629776, "rewards/format_reward_step": 0.984375, "step": 106 }, { "aux_distill/final_loss": 0.015143587786269563, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08319069840945303, "aux_distill/mean_u": 0.24450310729418495, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 156.625, "aux_distill/step_loss": 0.8167633889243007, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4672549019607843, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.437390513833992, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": -0.008791777777777815, "calib/mean_conf": 0.245297233201581, "calib/mu_c": 0.2418222222222222, "calib/mu_w": 0.250614, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03897233201581027, "calib/std_conf": 0.29765119935010886, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3058320113314447, "calib/step_q_c_n": 706.0, "calib/step_q_gap": 0.004860530527057205, "calib/step_q_w": 0.3009714808043875, "calib/step_q_w_n": 547.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2283.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 412.12890625, "completions/mean_terminated_length": 412.12890625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.11413333333333334, "grad_norm": 0.006644075736403465, "learning_rate": 2.5833333333333337e-06, "loss": 0.1244, "num_tokens": 23066744.0, "reward": 1.0612491369247437, "reward_std": 0.1764146387577057, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.5326545238494873, "rewards/format_reward_step": 0.98828125, "step": 107 }, { "aux_distill/final_loss": 0.00011162566943312413, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07747206592466682, "aux_distill/mean_u": 0.22652066254173892, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 174.25, "aux_distill/step_loss": 0.7746090218424797, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5271950271950272, "calib/avg_num_step_conf": 5.4453125, "calib/ece": 0.4207996078431372, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.10588235294117647, "calib/gap": 0.06467196969696962, "calib/mean_conf": 0.25653372549019604, "calib/mu_c": 0.2816416666666666, "calib/mu_w": 0.216969696969697, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.032784313725490205, "calib/std_conf": 0.3157732207471411, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.35206224611708486, "calib/step_q_c_n": 837.0, "calib/step_q_gap": 0.03558109710451751, "calib/step_q_w": 0.31648114901256735, "calib/step_q_w_n": 557.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1422.0, "completions/max_terminated_length": 1422.0, "completions/mean_length": 450.72265625, "completions/mean_terminated_length": 452.490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1152, "grad_norm": 0.006208624690771103, "learning_rate": 2.5555555555555557e-06, "loss": 0.0493, "num_tokens": 23285361.0, "reward": 1.069959044456482, "reward_std": 0.20371109247207642, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5500743389129639, "rewards/format_reward_step": 0.98046875, "step": 108 }, { "aux_distill/final_loss": 0.009648739696444864, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0812309457687661, "aux_distill/mean_u": 0.21662739191092542, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 167.875, "aux_distill/step_loss": 0.8026606999337673, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5209545214172395, "calib/avg_num_step_conf": 5.31640625, "calib/ece": 0.3786040650406505, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.11788617886178862, "calib/gap": 0.0037177154944474933, "calib/mean_conf": 0.2769243902439025, "calib/mu_c": 0.2787983606557377, "calib/mu_w": 0.2750806451612902, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.07979674796747968, "calib/std_conf": 0.3287158839057603, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.32818075709779176, "calib/step_q_c_n": 634.0, "calib/step_q_gap": 0.06785063330136809, "calib/step_q_w": 0.2603301237964237, "calib/step_q_w_n": 727.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2738.0, "completions/max_terminated_length": 2738.0, "completions/mean_length": 478.3203125, "completions/mean_terminated_length": 487.8486328125, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.11626666666666667, "grad_norm": 0.007237493991851807, "learning_rate": 2.5277777777777778e-06, "loss": 0.1332, "num_tokens": 23512411.0, "reward": 0.9953612089157104, "reward_std": 0.25147342681884766, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5649412274360657, "rewards/format_reward_step": 0.94921875, "step": 109 }, { "aux_distill/final_loss": 0.009435338434059304, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08010290749371052, "aux_distill/mean_u": 0.21185826195867102, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 144.25, "aux_distill/step_loss": 0.7915937229990959, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5496252261566296, "calib/avg_num_step_conf": 4.6796875, "calib/ece": 0.2886376984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.062404380976996576, "calib/mean_conf": 0.2562035714285714, "calib/mu_c": 0.2923584905660377, "calib/mu_w": 0.2299541095890411, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06210317460317461, "calib/std_conf": 0.31680728110360845, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.3212387387387387, "calib/step_q_c_n": 444.0, "calib/step_q_gap": 0.0304860862188448, "calib/step_q_w": 0.2907526525198939, "calib/step_q_w_n": 754.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2423.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 445.17578125, "completions/mean_terminated_length": 446.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.11733333333333333, "grad_norm": 0.006269925739616156, "learning_rate": 2.5e-06, "loss": 0.097, "num_tokens": 23731296.0, "reward": 1.017866849899292, "reward_std": 0.2148369401693344, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6451085805892944, "rewards/format_reward_step": 0.9765625, "step": 110 }, { "aux_distill/final_loss": 0.00016764799386237428, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07741684489883482, "aux_distill/mean_u": 0.24787259585562457, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 148.125, "aux_distill/step_loss": 0.7740007806569338, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48793092582088615, "calib/avg_num_step_conf": 4.6328125, "calib/ece": 0.38767103174603174, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.06349206349206349, "calib/gap": -0.0182864624692759, "calib/mean_conf": 0.23494801587301586, "calib/mu_c": 0.226022480620155, "calib/mu_w": 0.2443089430894309, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05535714285714284, "calib/std_conf": 0.2902151359166349, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.28327626811594203, "calib/step_q_c_n": 552.0, "calib/step_q_gap": 0.0021421987153111277, "calib/step_q_w": 0.2811340694006309, "calib/step_q_w_n": 634.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2987.0, "completions/max_terminated_length": 2987.0, "completions/mean_length": 511.796875, "completions/mean_terminated_length": 511.796875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.1184, "grad_norm": 0.005823335610330105, "learning_rate": 2.4722222222222226e-06, "loss": 0.1564, "num_tokens": 23969724.0, "reward": 1.0191161632537842, "reward_std": 0.20448820292949677, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5616699457168579, "rewards/format_reward_step": 0.97265625, "step": 111 }, { "aux_distill/final_loss": 0.00019943361803598236, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0781205742387101, "aux_distill/mean_u": 0.26338176908493466, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 154.125, "aux_distill/step_loss": 0.7810062952339649, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5561535011852137, "calib/avg_num_step_conf": 4.921875, "calib/ece": 0.33006480000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.056, "calib/gap": 0.07037543724774167, "calib/mean_conf": 0.20801519999999998, "calib/mu_c": 0.2443289256198347, "calib/mu_w": 0.17395348837209304, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.02704, "calib/std_conf": 0.2771900833885657, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.2894207272727273, "calib/step_q_c_n": 550.0, "calib/step_q_gap": 0.060798614596670936, "calib/step_q_w": 0.22862211267605634, "calib/step_q_w_n": 710.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2891.0, "completions/max_terminated_length": 2891.0, "completions/mean_length": 485.859375, "completions/mean_terminated_length": 489.6850280761719, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.11946666666666667, "grad_norm": 0.006966287270188332, "learning_rate": 2.4444444444444447e-06, "loss": 0.1186, "num_tokens": 24202024.0, "reward": 1.0294950008392334, "reward_std": 0.19632869958877563, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6136775016784668, "rewards/format_reward_step": 0.97265625, "step": 112 }, { "aux_distill/final_loss": 0.006108010040748013, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07722375844605267, "aux_distill/mean_u": 0.22336582107667688, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 167.5, "aux_distill/step_loss": 0.766129563562572, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4713377882599581, "calib/avg_num_step_conf": 5.43359375, "calib/ece": 0.442466, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.068, "calib/gap": -0.017289635744234783, "calib/mean_conf": 0.222494, "calib/mu_c": 0.21516319444444443, "calib/mu_w": 0.2324528301886792, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04447999999999999, "calib/std_conf": 0.2998736883489447, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2770047493403694, "calib/step_q_c_n": 758.0, "calib/step_q_gap": 0.02510901474321303, "calib/step_q_w": 0.25189573459715636, "calib/step_q_w_n": 633.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 467.87890625, "completions/mean_terminated_length": 473.4269104003906, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.12053333333333334, "grad_norm": 0.006655262317508459, "learning_rate": 2.4166666666666667e-06, "loss": 0.1486, "num_tokens": 24427001.0, "reward": 1.0256073474884033, "reward_std": 0.21474526822566986, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5160583853721619, "rewards/format_reward_step": 0.97265625, "step": 113 }, { "aux_distill/final_loss": 0.00025458406253164867, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0691154002561234, "aux_distill/mean_u": 0.19843236018875485, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 168.625, "aux_distill/step_loss": 0.6908994019031525, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47466216216216217, "calib/avg_num_step_conf": 5.26953125, "calib/ece": 0.4669444444444444, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04365079365079365, "calib/gap": -0.017780665280665242, "calib/mean_conf": 0.18686507936507937, "calib/mu_c": 0.17952702702702705, "calib/mu_w": 0.1973076923076923, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03325396825396826, "calib/std_conf": 0.27517976384782167, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2416880308880309, "calib/step_q_c_n": 777.0, "calib/step_q_gap": -0.02471057051057049, "calib/step_q_w": 0.2663986013986014, "calib/step_q_w_n": 572.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 466.74609375, "completions/mean_terminated_length": 470.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1216, "grad_norm": 0.007292897906154394, "learning_rate": 2.388888888888889e-06, "loss": 0.0881, "num_tokens": 24651512.0, "reward": 1.031526803970337, "reward_std": 0.16938289999961853, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5044597387313843, "rewards/format_reward_step": 0.98046875, "step": 114 }, { "aux_distill/final_loss": 0.00021639067404066736, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06915236706845462, "aux_distill/mean_u": 0.1697514341712585, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 178.25, "aux_distill/step_loss": 0.6913072746247053, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5266768292682927, "calib/avg_num_step_conf": 5.70703125, "calib/ece": 0.3867912350597611, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.05976095617529881, "calib/gap": 0.008917117632113891, "calib/mean_conf": 0.20269083665338644, "calib/mu_c": 0.20723821138211387, "calib/mu_w": 0.19832109374999998, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04972111553784861, "calib/std_conf": 0.28575867224050194, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.28042391653290527, "calib/step_q_c_n": 623.0, "calib/step_q_gap": 0.03259336760689094, "calib/step_q_w": 0.24783054892601433, "calib/step_q_w_n": 838.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2338.0, "completions/max_terminated_length": 2338.0, "completions/mean_length": 481.078125, "completions/mean_terminated_length": 486.7826232910156, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.12266666666666666, "grad_norm": 0.006700317841023207, "learning_rate": 2.361111111111111e-06, "loss": 0.0762, "num_tokens": 24879932.0, "reward": 1.0177994966506958, "reward_std": 0.20145021378993988, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5785678029060364, "rewards/format_reward_step": 0.9765625, "step": 115 }, { "aux_distill/final_loss": 0.0003319979881553081, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07306196971330792, "aux_distill/mean_u": 0.1895094989100424, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 196.75, "aux_distill/step_loss": 0.7302876887843013, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5061617968594713, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.4580081300813008, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.07723577235772358, "calib/gap": -0.031216457960644012, "calib/mean_conf": 0.18004065040650405, "calib/mu_c": 0.16519379844961243, "calib/mu_w": 0.19641025641025645, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.05682926829268293, "calib/std_conf": 0.2798409797676183, "calib/step_conf_rate": 0.9765625, "calib/step_q_c": 0.24775482093663911, "calib/step_q_c_n": 726.0, "calib/step_q_gap": -0.035533279272129153, "calib/step_q_w": 0.28328810020876827, "calib/step_q_w_n": 958.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 3057.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 557.6640625, "completions/mean_terminated_length": 568.77294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.12373333333333333, "grad_norm": 0.006594009697437286, "learning_rate": 2.3333333333333336e-06, "loss": 0.092, "num_tokens": 25127214.0, "reward": 0.980388879776001, "reward_std": 0.2139553725719452, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.507652759552002, "rewards/format_reward_step": 0.94921875, "step": 116 }, { "aux_distill/final_loss": 0.0018335038339500898, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0666623511351645, "aux_distill/mean_u": 0.16838721496509335, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 212.375, "aux_distill/step_loss": 0.6647899970412254, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5332070707070707, "calib/avg_num_step_conf": 6.63671875, "calib/ece": 0.34901574803149604, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.034113636363636374, "calib/mean_conf": 0.1660236220472441, "calib/mu_c": 0.18536363636363637, "calib/mu_w": 0.15125, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04098425196850393, "calib/std_conf": 0.2629981876099023, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2559718309859155, "calib/step_q_c_n": 710.0, "calib/step_q_gap": 0.03504159842777599, "calib/step_q_w": 0.22093023255813954, "calib/step_q_w_n": 989.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2143.0, "completions/max_terminated_length": 2143.0, "completions/mean_length": 551.36328125, "completions/mean_terminated_length": 553.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.1248, "grad_norm": 0.00625663623213768, "learning_rate": 2.305555555555556e-06, "loss": 0.0713, "num_tokens": 25374963.0, "reward": 1.0238478183746338, "reward_std": 0.1417831927537918, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6258206963539124, "rewards/format_reward_step": 0.9921875, "step": 117 }, { "aux_distill/final_loss": 0.0007958354216270891, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.05608076159842312, "aux_distill/mean_u": 0.13667836461954033, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 286.0, "aux_distill/step_loss": 0.5600117654539645, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.537762467191601, "calib/avg_num_step_conf": 9.67578125, "calib/ece": 0.4103603238866397, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.08906882591093117, "calib/gap": 0.03999035433070866, "calib/mean_conf": 0.19773684210526313, "calib/mu_c": 0.2171653543307087, "calib/mu_w": 0.17717500000000003, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.046963562753036446, "calib/std_conf": 0.29979166784326466, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2636060606060606, "calib/step_q_c_n": 990.0, "calib/step_q_gap": 0.08017633632899271, "calib/step_q_w": 0.1834297242770679, "calib/step_q_w_n": 1487.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 613.80078125, "completions/mean_terminated_length": 621.0791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.12586666666666665, "grad_norm": 0.009130639024078846, "learning_rate": 2.277777777777778e-06, "loss": 0.1473, "num_tokens": 25636104.0, "reward": 1.0064516067504883, "reward_std": 0.21105365455150604, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.555871844291687, "rewards/format_reward_step": 0.9609375, "step": 118 }, { "aux_distill/final_loss": 0.000528632641362492, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06704626954160631, "aux_distill/mean_u": 0.19759312360890224, "aux_distill/n_active_final_tok": 30.625, "aux_distill/n_active_tok": 214.75, "aux_distill/step_loss": 0.6699340520426631, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5888421474358975, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.3749795918367347, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.044897959183673466, "calib/gap": 0.06137887286324792, "calib/mean_conf": 0.15579591836734694, "calib/mu_c": 0.1878632478632479, "calib/mu_w": 0.12648437499999998, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.02661224489795918, "calib/std_conf": 0.24266858054420698, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.23157199471598416, "calib/step_q_c_n": 757.0, "calib/step_q_gap": 0.06798990516374537, "calib/step_q_w": 0.1635820895522388, "calib/step_q_w_n": 1139.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2858.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 610.8046875, "completions/mean_terminated_length": 622.9721069335938, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.12693333333333334, "grad_norm": 0.008736925199627876, "learning_rate": 2.25e-06, "loss": 0.1025, "num_tokens": 25897534.0, "reward": 0.9991916418075562, "reward_std": 0.20187215507030487, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5882269144058228, "rewards/format_reward_step": 0.953125, "step": 119 }, { "aux_distill/final_loss": 0.0005055578319570486, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.057472184766083956, "aux_distill/mean_u": 0.15117055273225058, "aux_distill/n_active_final_tok": 29.375, "aux_distill/n_active_tok": 252.125, "aux_distill/step_loss": 0.5742162819951773, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5447395376060872, "calib/avg_num_step_conf": 9.30859375, "calib/ece": 0.48006355932203393, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.05084745762711865, "calib/gap": 0.022365378987415863, "calib/mean_conf": 0.1495127118644068, "calib/mu_c": 0.15917910447761194, "calib/mu_w": 0.13681372549019608, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.030889830508474585, "calib/std_conf": 0.25454153670119367, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.23720441988950275, "calib/step_q_c_n": 905.0, "calib/step_q_gap": 0.11476856062021992, "calib/step_q_w": 0.12243585926928283, "calib/step_q_w_n": 1478.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 593.75390625, "completions/mean_terminated_length": 625.5184936523438, "completions/min_length": 0.0, "completions/min_terminated_length": 238.0, "epoch": 0.128, "grad_norm": 0.005892501212656498, "learning_rate": 2.222222222222222e-06, "loss": 0.0633, "num_tokens": 26156223.0, "reward": 0.9611204266548157, "reward_std": 0.26143747568130493, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.4808346629142761, "rewards/format_reward_step": 0.91796875, "step": 120 }, { "aux_distill/final_loss": 0.001512952457233041, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06258805538527668, "aux_distill/mean_u": 0.21936762366366194, "aux_distill/n_active_final_tok": 29.125, "aux_distill/n_active_tok": 303.0, "aux_distill/step_loss": 0.6243675984442234, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5526315789473685, "calib/avg_num_step_conf": 10.44921875, "calib/ece": 0.4203800847457627, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.0635593220338983, "calib/gap": 0.026903523152142617, "calib/mean_conf": 0.17241652542372882, "calib/mu_c": 0.18541229508196722, "calib/mu_w": 0.1585087719298246, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03792372881355932, "calib/std_conf": 0.26805675424823155, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.24635463071512306, "calib/step_q_c_n": 853.0, "calib/step_q_gap": 0.04392323664267517, "calib/step_q_w": 0.2024313940724479, "calib/step_q_w_n": 1822.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 690.86328125, "completions/mean_terminated_length": 718.9471435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.12906666666666666, "grad_norm": 0.005467615555971861, "learning_rate": 2.1944444444444445e-06, "loss": 0.1284, "num_tokens": 26438140.0, "reward": 0.9531474113464355, "reward_std": 0.2858285903930664, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5234823226928711, "rewards/format_reward_step": 0.90625, "step": 121 }, { "aux_distill/final_loss": 0.0019614799234659586, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06464390846667811, "aux_distill/mean_u": 0.2041460817987188, "aux_distill/n_active_final_tok": 29.75, "aux_distill/n_active_tok": 266.5, "aux_distill/step_loss": 0.6444775881245732, "calib/answer_extract_rate": 0.921875, "calib/auroc": 0.5519595359366157, "calib/avg_num_step_conf": 10.64453125, "calib/ece": 0.4018483193277311, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": 0.05940233446519527, "calib/mean_conf": 0.17437016806722688, "calib/mu_c": 0.20282338709677422, "calib/mu_w": 0.14342105263157895, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027605042016806726, "calib/std_conf": 0.26916321153937484, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.25753532863849765, "calib/step_q_c_n": 852.0, "calib/step_q_gap": 0.09895524321404492, "calib/step_q_w": 0.15858008542445273, "calib/step_q_w_n": 1873.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 588.94140625, "completions/mean_terminated_length": 625.5975341796875, "completions/min_length": 0.0, "completions/min_terminated_length": 266.0, "epoch": 0.13013333333333332, "grad_norm": 0.0055676959455013275, "learning_rate": 2.166666666666667e-06, "loss": 0.0066, "num_tokens": 26696253.0, "reward": 0.9706401824951172, "reward_std": 0.26492780447006226, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5389367341995239, "rewards/format_reward_step": 0.91796875, "step": 122 }, { "aux_distill/final_loss": 0.0024005121531445184, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06945573759730905, "aux_distill/mean_u": 0.2149112364506133, "aux_distill/n_active_final_tok": 30.125, "aux_distill/n_active_tok": 231.0, "aux_distill/step_loss": 0.6921568466350436, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.4637264957264957, "calib/avg_num_step_conf": 7.9921875, "calib/ece": 0.39942148760330587, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.06611570247933884, "calib/gap": -0.017327863247863284, "calib/mean_conf": 0.20570247933884298, "calib/mu_c": 0.19675213675213674, "calib/mu_w": 0.21408000000000002, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.060826446280991736, "calib/std_conf": 0.2879776347097573, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.24959493670886077, "calib/step_q_c_n": 790.0, "calib/step_q_gap": -0.012124808514069135, "calib/step_q_w": 0.2617197452229299, "calib/step_q_w_n": 1256.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2603.0, "completions/max_terminated_length": 2603.0, "completions/mean_length": 612.5390625, "completions/mean_terminated_length": 642.6638793945312, "completions/min_length": 0.0, "completions/min_terminated_length": 273.0, "epoch": 0.1312, "grad_norm": 0.005516073666512966, "learning_rate": 2.138888888888889e-06, "loss": 0.0124, "num_tokens": 26958351.0, "reward": 0.9662868976593018, "reward_std": 0.23128430545330048, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5419487953186035, "rewards/format_reward_step": 0.93359375, "step": 123 }, { "aux_distill/final_loss": 0.0008837626273816568, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06087582663167268, "aux_distill/mean_u": 0.1862628107519184, "aux_distill/n_active_final_tok": 29.875, "aux_distill/n_active_tok": 253.125, "aux_distill/step_loss": 0.6078744931146502, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.5328686720469552, "calib/avg_num_step_conf": 8.91015625, "calib/ece": 0.4679916317991631, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.08786610878661087, "calib/gap": 0.026069699192956597, "calib/mean_conf": 0.21953974895397488, "calib/mu_c": 0.2297931034482758, "calib/mu_w": 0.2037234042553192, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.040418410041841005, "calib/std_conf": 0.30176918617622306, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.26054347826086954, "calib/step_q_c_n": 1012.0, "calib/step_q_gap": 0.05541250899373007, "calib/step_q_w": 0.20513096926713947, "calib/step_q_w_n": 1269.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 628.9921875, "completions/mean_terminated_length": 646.6746826171875, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.13226666666666667, "grad_norm": 0.005148047115653753, "learning_rate": 2.1111111111111114e-06, "loss": 0.1321, "num_tokens": 27226189.0, "reward": 0.9987428188323975, "reward_std": 0.2690247893333435, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.49748554825782776, "rewards/format_reward_step": 0.93359375, "step": 124 }, { "aux_distill/final_loss": 0.0019698337684985745, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.058478535269387066, "aux_distill/mean_u": 0.1786380818921242, "aux_distill/n_active_final_tok": 29.5, "aux_distill/n_active_tok": 246.0, "aux_distill/step_loss": 0.5828155069611967, "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.5291393559418638, "calib/avg_num_step_conf": 8.8515625, "calib/ece": 0.3565527426160337, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.08438818565400844, "calib/gap": 0.03483884297520659, "calib/mean_conf": 0.22471308016877636, "calib/mu_c": 0.2425, "calib/mu_w": 0.2076611570247934, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0459071729957806, "calib/std_conf": 0.29503832611974684, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.294019728729963, "calib/step_q_c_n": 811.0, "calib/step_q_gap": 0.045377117046114246, "calib/step_q_w": 0.24864261168384877, "calib/step_q_w_n": 1455.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2669.0, "completions/max_terminated_length": 2669.0, "completions/mean_length": 652.390625, "completions/mean_terminated_length": 676.1619873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 237.0, "epoch": 0.13333333333333333, "grad_norm": 0.005496951751410961, "learning_rate": 2.0833333333333334e-06, "loss": 0.1146, "num_tokens": 27498009.0, "reward": 0.9719964265823364, "reward_std": 0.24279111623764038, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5650866031646729, "rewards/format_reward_step": 0.92578125, "step": 125 }, { "aux_distill/final_loss": 0.0008635271465209371, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06402088113827631, "aux_distill/mean_u": 0.20865287611760336, "aux_distill/n_active_final_tok": 29.625, "aux_distill/n_active_tok": 265.0, "aux_distill/step_loss": 0.6393452696502209, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5258731290092659, "calib/avg_num_step_conf": 9.03515625, "calib/ece": 0.3979261603375528, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.10970464135021098, "calib/gap": 0.014444526015680637, "calib/mean_conf": 0.26207383966244724, "calib/mu_c": 0.26908278688524584, "calib/mu_w": 0.2546382608695652, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07261603375527426, "calib/std_conf": 0.3239697482738927, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3166586538461538, "calib/step_q_c_n": 936.0, "calib/step_q_gap": -0.021296320736271745, "calib/step_q_w": 0.33795497458242557, "calib/step_q_w_n": 1377.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 616.87890625, "completions/mean_terminated_length": 641.9552612304688, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.1344, "grad_norm": 0.005289388354867697, "learning_rate": 2.0555555555555555e-06, "loss": 0.0427, "num_tokens": 27761394.0, "reward": 0.9755932092666626, "reward_std": 0.2762099504470825, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.54493647813797, "rewards/format_reward_step": 0.92578125, "step": 126 }, { "aux_distill/final_loss": 0.0005444334367439296, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06492951465770602, "aux_distill/mean_u": 0.18184597767731114, "aux_distill/n_active_final_tok": 30.25, "aux_distill/n_active_tok": 208.25, "aux_distill/step_loss": 0.6487507000565529, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5325573433755564, "calib/avg_num_step_conf": 7.95703125, "calib/ece": 0.329253305785124, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.07851239669421488, "calib/gap": 0.03287363916466965, "calib/mean_conf": 0.25405247933884295, "calib/mu_c": 0.271304347826087, "calib/mu_w": 0.23843070866141733, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.054049586776859504, "calib/std_conf": 0.29892154795647613, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.31726683937823835, "calib/step_q_c_n": 772.0, "calib/step_q_gap": 0.0903801200106494, "calib/step_q_w": 0.22688671936758895, "calib/step_q_w_n": 1265.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 598.4921875, "completions/mean_terminated_length": 617.79833984375, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.13546666666666668, "grad_norm": 0.005467047914862633, "learning_rate": 2.027777777777778e-06, "loss": 0.0484, "num_tokens": 28018280.0, "reward": 0.9944472312927246, "reward_std": 0.24159783124923706, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5943632125854492, "rewards/format_reward_step": 0.9453125, "step": 127 }, { "aux_distill/final_loss": 0.0007024324584108399, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06789599265903234, "aux_distill/mean_u": 0.2013985319393502, "aux_distill/n_active_final_tok": 29.375, "aux_distill/n_active_tok": 201.125, "aux_distill/step_loss": 0.678257486782968, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.587270341207349, "calib/avg_num_step_conf": 6.7578125, "calib/ece": 0.3896170212765958, "calib/final_conf_rate": 0.91796875, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.10638297872340426, "calib/gap": 0.05565835520559939, "calib/mean_conf": 0.266468085106383, "calib/mu_c": 0.2920472440944883, "calib/mu_w": 0.2363888888888889, "calib/nonempty_final_conf_rate": 0.91796875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.05782978723404256, "calib/std_conf": 0.31932221111810316, "calib/step_conf_rate": 0.96484375, "calib/step_q_c": 0.3081908302354399, "calib/step_q_c_n": 807.0, "calib/step_q_gap": 0.05836417801442151, "calib/step_q_w": 0.2498266522210184, "calib/step_q_w_n": 923.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 604.89453125, "completions/mean_terminated_length": 626.9352416992188, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.13653333333333334, "grad_norm": 0.005871245171874762, "learning_rate": 2.0000000000000003e-06, "loss": 0.0333, "num_tokens": 28279797.0, "reward": 0.9795734286308289, "reward_std": 0.2901492416858673, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5489906072616577, "rewards/format_reward_step": 0.9140625, "step": 128 }, { "aux_distill/final_loss": 0.00048586055117993965, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06084221811033785, "aux_distill/mean_u": 0.18788175666293686, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 211.75, "aux_distill/step_loss": 0.6079363012686372, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.49360562437227984, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.42291088709677416, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.06451612903225806, "calib/gap": 0.0068874121191830895, "calib/mean_conf": 0.24950846774193544, "calib/mu_c": 0.25236896551724136, "calib/mu_w": 0.24548155339805827, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.04387096774193548, "calib/std_conf": 0.29721459264565586, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.28531420824295006, "calib/step_q_c_n": 922.0, "calib/step_q_gap": 0.027849422689902648, "calib/step_q_w": 0.2574647855530474, "calib/step_q_w_n": 886.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2926.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 561.84765625, "completions/mean_terminated_length": 566.2716674804688, "completions/min_length": 0.0, "completions/min_terminated_length": 223.0, "epoch": 0.1376, "grad_norm": 0.005904473830014467, "learning_rate": 1.9722222222222224e-06, "loss": 0.1375, "num_tokens": 28526014.0, "reward": 1.0348446369171143, "reward_std": 0.23667702078819275, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5384392142295837, "rewards/format_reward_step": 0.96484375, "step": 129 }, { "aux_distill/final_loss": 0.014214775796517642, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06426420679781586, "aux_distill/mean_u": 0.19287301748504312, "aux_distill/n_active_final_tok": 30.375, "aux_distill/n_active_tok": 200.375, "aux_distill/step_loss": 0.6284272773191333, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.4567132867132867, "calib/avg_num_step_conf": 6.5390625, "calib/ece": 0.47259259259259256, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.09465020576131687, "calib/gap": -0.03296993006993004, "calib/mean_conf": 0.25349794238683127, "calib/mu_c": 0.23993006993006993, "calib/mu_w": 0.2729, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0688065843621399, "calib/std_conf": 0.3175233813948065, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30043927648578816, "calib/step_q_c_n": 774.0, "calib/step_q_gap": 0.04557260981912148, "calib/step_q_w": 0.2548666666666667, "calib/step_q_w_n": 900.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2974.0, "completions/max_terminated_length": 2974.0, "completions/mean_length": 542.94921875, "completions/mean_terminated_length": 558.2128295898438, "completions/min_length": 0.0, "completions/min_terminated_length": 200.0, "epoch": 0.13866666666666666, "grad_norm": 0.005810092203319073, "learning_rate": 1.944444444444445e-06, "loss": 0.0745, "num_tokens": 28770297.0, "reward": 1.001173973083496, "reward_std": 0.24746762216091156, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.49844178557395935, "rewards/format_reward_step": 0.9453125, "step": 130 }, { "aux_distill/final_loss": 0.016351521228671118, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06701393506955355, "aux_distill/mean_u": 0.15899207917983457, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 188.0, "aux_distill/step_loss": 0.6537878112867475, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.49050849780701755, "calib/avg_num_step_conf": 6.28125, "calib/ece": 0.30870967741935484, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": 0.003969298245614011, "calib/mean_conf": 0.21548387096774194, "calib/mu_c": 0.21791666666666668, "calib/mu_w": 0.21394736842105266, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06854838709677419, "calib/std_conf": 0.2861559884745632, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2709739130434783, "calib/step_q_c_n": 575.0, "calib/step_q_gap": 0.04122560713834761, "calib/step_q_w": 0.22974830590513068, "calib/step_q_w_n": 1033.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2437.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 562.078125, "completions/mean_terminated_length": 571.0000610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 256.0, "epoch": 0.13973333333333332, "grad_norm": 0.006515398155897856, "learning_rate": 1.916666666666667e-06, "loss": 0.0634, "num_tokens": 29020397.0, "reward": 0.9808332324028015, "reward_std": 0.2079819142818451, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6257289052009583, "rewards/format_reward_step": 0.9609375, "step": 131 }, { "aux_distill/final_loss": 0.00035071817535481387, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06427617382723838, "aux_distill/mean_u": 0.220794219787013, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 181.125, "aux_distill/step_loss": 0.6424110066145658, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.516559829059829, "calib/avg_num_step_conf": 6.02734375, "calib/ece": 0.4125000000000001, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.09274193548387097, "calib/gap": 0.03446047008547004, "calib/mean_conf": 0.23145161290322577, "calib/mu_c": 0.24590277777777775, "calib/mu_w": 0.2114423076923077, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03165322580645162, "calib/std_conf": 0.30537046128666534, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2805757196495619, "calib/step_q_c_n": 799.0, "calib/step_q_gap": 0.034769268036658696, "calib/step_q_w": 0.2458064516129032, "calib/step_q_w_n": 744.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2455.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 536.46875, "completions/mean_terminated_length": 551.5501708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.1408, "grad_norm": 0.006517018191516399, "learning_rate": 1.888888888888889e-06, "loss": 0.0563, "num_tokens": 29263325.0, "reward": 1.0359539985656738, "reward_std": 0.19730764627456665, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5406578183174133, "rewards/format_reward_step": 0.96875, "step": 132 }, { "aux_distill/final_loss": 0.0002883227602978877, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0747068403288722, "aux_distill/mean_u": 0.22356829570609857, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 194.25, "aux_distill/step_loss": 0.7467800760641694, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5050087108013938, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.30756479999999997, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.072, "calib/gap": -0.041652264808362316, "calib/mean_conf": 0.20579519999999998, "calib/mu_c": 0.1778048780487805, "calib/mu_w": 0.21945714285714282, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.09268, "calib/std_conf": 0.28980485402587725, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.22513432835820893, "calib/step_q_c_n": 469.0, "calib/step_q_gap": -0.058855662624658495, "calib/step_q_w": 0.2839899909828674, "calib/step_q_w_n": 1109.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2769.0, "completions/max_terminated_length": 2769.0, "completions/mean_length": 654.12890625, "completions/mean_terminated_length": 656.6941528320312, "completions/min_length": 0.0, "completions/min_terminated_length": 243.0, "epoch": 0.14186666666666667, "grad_norm": 0.005605577025562525, "learning_rate": 1.8611111111111113e-06, "loss": 0.2037, "num_tokens": 29537126.0, "reward": 0.969298779964447, "reward_std": 0.2005949318408966, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.645628809928894, "rewards/format_reward_step": 0.97265625, "step": 133 }, { "aux_distill/final_loss": 0.0003131369194306899, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06677349738311023, "aux_distill/mean_u": 0.20721821418510009, "aux_distill/n_active_final_tok": 30.0, "aux_distill/n_active_tok": 173.875, "aux_distill/step_loss": 0.6674218215048313, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5016752136752136, "calib/avg_num_step_conf": 6.07421875, "calib/ece": 0.423140082644628, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.06198347107438017, "calib/gap": -0.010135268376068401, "calib/mean_conf": 0.18314090909090908, "calib/mu_c": 0.1782408, "calib/mu_w": 0.1883760683760684, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.04487603305785123, "calib/std_conf": 0.27150315132183267, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.23020534591194966, "calib/step_q_c_n": 636.0, "calib/step_q_gap": 0.04792025342011069, "calib/step_q_w": 0.18228509249183897, "calib/step_q_w_n": 919.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 3042.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 621.16796875, "completions/mean_terminated_length": 641.2056274414062, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.14293333333333333, "grad_norm": 0.005336909554898739, "learning_rate": 1.8333333333333333e-06, "loss": 0.0784, "num_tokens": 29805097.0, "reward": 0.9763469099998474, "reward_std": 0.22861750423908234, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5269124507904053, "rewards/format_reward_step": 0.9375, "step": 134 }, { "aux_distill/final_loss": 0.0003893321477335121, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07088062487309799, "aux_distill/mean_u": 0.19776091224026507, "aux_distill/n_active_final_tok": 30.5, "aux_distill/n_active_tok": 182.625, "aux_distill/step_loss": 0.7084169033914804, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.48846875838026277, "calib/avg_num_step_conf": 5.80078125, "calib/ece": 0.40654489795918375, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.053061224489795916, "calib/gap": -0.034601903995709254, "calib/mean_conf": 0.15843469387755105, "calib/mu_c": 0.1397920353982301, "calib/mu_w": 0.17439393939393935, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.05187755102040817, "calib/std_conf": 0.25144952798298403, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.1987861952861953, "calib/step_q_c_n": 594.0, "calib/step_q_gap": -0.020565319865319842, "calib/step_q_w": 0.21935151515151513, "calib/step_q_w_n": 891.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2518.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 609.62890625, "completions/mean_terminated_length": 624.260009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.144, "grad_norm": 0.006061874330043793, "learning_rate": 1.8055555555555557e-06, "loss": 0.1013, "num_tokens": 30067042.0, "reward": 0.9705733060836792, "reward_std": 0.2059599757194519, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5505216121673584, "rewards/format_reward_step": 0.94921875, "step": 135 }, { "aux_distill/final_loss": 0.0003441147242142506, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06527584453579038, "aux_distill/mean_u": 0.16068252651141998, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 191.625, "aux_distill/step_loss": 0.6524143265560269, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.492408226023448, "calib/avg_num_step_conf": 6.11328125, "calib/ece": 0.41375999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.052, "calib/gap": -0.02164136075341147, "calib/mean_conf": 0.16720000000000002, "calib/mu_c": 0.1560330578512397, "calib/mu_w": 0.17767441860465116, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.048479999999999995, "calib/std_conf": 0.2597524975818327, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.22399700598802394, "calib/step_q_c_n": 668.0, "calib/step_q_gap": -0.004259404268386308, "calib/step_q_w": 0.22825641025641025, "calib/step_q_w_n": 897.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2802.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 577.91015625, "completions/mean_terminated_length": 584.7628784179688, "completions/min_length": 0.0, "completions/min_terminated_length": 205.0, "epoch": 0.14506666666666668, "grad_norm": 0.005741880275309086, "learning_rate": 1.777777777777778e-06, "loss": 0.0699, "num_tokens": 30323475.0, "reward": 1.001725196838379, "reward_std": 0.17823567986488342, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5581378936767578, "rewards/format_reward_step": 0.97265625, "step": 136 }, { "aux_distill/final_loss": 0.0002509895234652504, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07148401136510074, "aux_distill/mean_u": 0.2109595339307451, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 185.75, "aux_distill/step_loss": 0.7145891096442938, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5308645899554991, "calib/avg_num_step_conf": 5.8203125, "calib/ece": 0.3838804780876494, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.05437571519389703, "calib/mean_conf": 0.15659760956175298, "calib/mu_c": 0.1847603305785124, "calib/mu_w": 0.13038461538461538, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.029203187250996014, "calib/std_conf": 0.2571923982457517, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.239253893129771, "calib/step_q_c_n": 655.0, "calib/step_q_gap": 0.03216527037528, "calib/step_q_w": 0.207088622754491, "calib/step_q_w_n": 835.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2390.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 574.58984375, "completions/mean_terminated_length": 581.4031982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 256.0, "epoch": 0.14613333333333334, "grad_norm": 0.006100764032453299, "learning_rate": 1.75e-06, "loss": 0.0623, "num_tokens": 30577554.0, "reward": 1.0213937759399414, "reward_std": 0.1669759452342987, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5935689210891724, "rewards/format_reward_step": 0.9765625, "step": 137 }, { "aux_distill/final_loss": 0.0010311530596709417, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06165759603027254, "aux_distill/mean_u": 0.1574107536924648, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 175.75, "aux_distill/step_loss": 0.6155447992496192, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5907687991021325, "calib/avg_num_step_conf": 5.5859375, "calib/ece": 0.5249600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.088, "calib/gap": 0.037878787878787845, "calib/mean_conf": 0.19, "calib/mu_c": 0.2033333333333333, "calib/mu_w": 0.16545454545454547, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03348, "calib/std_conf": 0.2955848439957638, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2696705882352941, "calib/step_q_c_n": 850.0, "calib/step_q_gap": 0.023498174442190656, "calib/step_q_w": 0.24617241379310345, "calib/step_q_w_n": 580.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 560.2109375, "completions/mean_terminated_length": 566.853759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.1472, "grad_norm": 0.006789561361074448, "learning_rate": 1.7222222222222224e-06, "loss": 0.1078, "num_tokens": 30825304.0, "reward": 1.0351853370666504, "reward_std": 0.20294833183288574, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.47271445393562317, "rewards/format_reward_step": 0.96484375, "step": 138 }, { "aux_distill/final_loss": 0.0005163222972441872, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06195448955986649, "aux_distill/mean_u": 0.14828940232721982, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 169.125, "aux_distill/step_loss": 0.6190285570919514, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.42939988271323387, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.4920634920634919, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07539682539682539, "calib/gap": -0.05801133772072714, "calib/mean_conf": 0.2123015873015873, "calib/mu_c": 0.18859060402684566, "calib/mu_w": 0.2466019417475728, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05654761904761904, "calib/std_conf": 0.29581515758628385, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25153333333333333, "calib/step_q_c_n": 750.0, "calib/step_q_gap": -0.047710087719298244, "calib/step_q_w": 0.2992434210526316, "calib/step_q_w_n": 608.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2708.0, "completions/max_terminated_length": 2708.0, "completions/mean_length": 526.41015625, "completions/mean_terminated_length": 530.5551147460938, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.14826666666666666, "grad_norm": 0.0061951130628585815, "learning_rate": 1.6944444444444446e-06, "loss": 0.0623, "num_tokens": 31063161.0, "reward": 1.028887152671814, "reward_std": 0.17283034324645996, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.49136799573898315, "rewards/format_reward_step": 0.984375, "step": 139 }, { "aux_distill/final_loss": 0.0002839611361196148, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06980497867334634, "aux_distill/mean_u": 0.18654986838888957, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 174.875, "aux_distill/step_loss": 0.6977658206596971, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5171219187208528, "calib/avg_num_step_conf": 5.63671875, "calib/ece": 0.4897628458498023, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": 0.015161892071952038, "calib/mean_conf": 0.20620553359683796, "calib/mu_c": 0.21189873417721522, "calib/mu_w": 0.19673684210526318, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.035731225296442695, "calib/std_conf": 0.29114573529225096, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29182142857142856, "calib/step_q_c_n": 840.0, "calib/step_q_gap": 0.044039670694148325, "calib/step_q_w": 0.24778175787728024, "calib/step_q_w_n": 603.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2131.0, "completions/max_terminated_length": 2131.0, "completions/mean_length": 536.96484375, "completions/mean_terminated_length": 541.1929321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.14933333333333335, "grad_norm": 0.006557528395205736, "learning_rate": 1.6666666666666667e-06, "loss": 0.0498, "num_tokens": 31305640.0, "reward": 1.052268385887146, "reward_std": 0.185662180185318, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5029742121696472, "rewards/format_reward_step": 0.984375, "step": 140 }, { "aux_distill/final_loss": 0.0001985268124826689, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06410034786676988, "aux_distill/mean_u": 0.1784564167208868, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 160.625, "aux_distill/step_loss": 0.6408049371093512, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5053041018387552, "calib/avg_num_step_conf": 5.02734375, "calib/ece": 0.45168627450980386, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09411764705882353, "calib/gap": 0.01754854056834257, "calib/mean_conf": 0.2396078431372549, "calib/mu_c": 0.24655844155844156, "calib/mu_w": 0.229009900990099, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04368627450980392, "calib/std_conf": 0.32268336197478165, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.29642659279778394, "calib/step_q_c_n": 722.0, "calib/step_q_gap": 0.04403721226681051, "calib/step_q_w": 0.25238938053097343, "calib/step_q_w_n": 565.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1320.0, "completions/max_terminated_length": 1320.0, "completions/mean_length": 540.13671875, "completions/mean_terminated_length": 542.2549438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.1504, "grad_norm": 0.006194172892719507, "learning_rate": 1.638888888888889e-06, "loss": 0.0842, "num_tokens": 31551011.0, "reward": 1.0600550174713135, "reward_std": 0.19128084182739258, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.5263601541519165, "rewards/format_reward_step": 0.9921875, "step": 141 }, { "aux_distill/final_loss": 0.00028447176589452283, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07187848904868588, "aux_distill/mean_u": 0.21142897708295655, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 178.25, "aux_distill/step_loss": 0.7185004046186805, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5234375, "calib/avg_num_step_conf": 5.5703125, "calib/ece": 0.38613557312252966, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.09486166007905138, "calib/gap": 0.03321953124999996, "calib/mean_conf": 0.22200671936758895, "calib/mu_c": 0.23841953124999996, "calib/mu_w": 0.2052, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05110671936758893, "calib/std_conf": 0.30402223549959345, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.28538184663536775, "calib/step_q_c_n": 639.0, "calib/step_q_gap": 0.02116329517412252, "calib/step_q_w": 0.26421855146124523, "calib/step_q_w_n": 787.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1899.0, "completions/max_terminated_length": 1899.0, "completions/mean_length": 543.82421875, "completions/mean_terminated_length": 545.9569091796875, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.15146666666666667, "grad_norm": 0.006887676659971476, "learning_rate": 1.6111111111111113e-06, "loss": 0.0971, "num_tokens": 31795390.0, "reward": 1.0296494960784912, "reward_std": 0.18625324964523315, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5788301229476929, "rewards/format_reward_step": 0.9765625, "step": 142 }, { "aux_distill/final_loss": 0.0002852490885061343, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0663798114983365, "aux_distill/mean_u": 0.18406928543141352, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 183.625, "aux_distill/step_loss": 0.6635128539055586, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.45027642276422775, "calib/avg_num_step_conf": 6.20703125, "calib/ece": 0.4281854838709678, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": -0.04782504065040649, "calib/mean_conf": 0.18004032258064515, "calib/mu_c": 0.15593495934959353, "calib/mu_w": 0.20376000000000002, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05612903225806452, "calib/std_conf": 0.27363517327183806, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.20450949367088608, "calib/step_q_c_n": 632.0, "calib/step_q_gap": -0.08326480099996028, "calib/step_q_w": 0.28777429467084636, "calib/step_q_w_n": 957.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 576.08984375, "completions/mean_terminated_length": 585.234130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.15253333333333333, "grad_norm": 0.006010540761053562, "learning_rate": 1.5833333333333333e-06, "loss": 0.0568, "num_tokens": 32050205.0, "reward": 0.9878466725349426, "reward_std": 0.1761135756969452, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5303808450698853, "rewards/format_reward_step": 0.96484375, "step": 143 }, { "aux_distill/final_loss": 0.00022862903210807417, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07271759235300124, "aux_distill/mean_u": 0.18098461256417697, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 162.25, "aux_distill/step_loss": 0.7269472843036056, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5503400013203934, "calib/avg_num_step_conf": 5.0703125, "calib/ece": 0.4511416666666668, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.06746031746031746, "calib/gap": 0.05464961378490785, "calib/mean_conf": 0.21560436507936506, "calib/mu_c": 0.23707385620915034, "calib/mu_w": 0.1824242424242425, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.029801587301587294, "calib/std_conf": 0.28706309967537064, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.3026603888213852, "calib/step_q_c_n": 823.0, "calib/step_q_gap": 0.045081441452964166, "calib/step_q_w": 0.257578947368421, "calib/step_q_w_n": 475.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 541.40234375, "completions/mean_terminated_length": 543.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 206.0, "epoch": 0.1536, "grad_norm": 0.006602726876735687, "learning_rate": 1.5555555555555558e-06, "loss": 0.105, "num_tokens": 32292932.0, "reward": 1.0560318231582642, "reward_std": 0.1847558617591858, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.5378449559211731, "rewards/format_reward_step": 0.9765625, "step": 144 }, { "aux_distill/final_loss": 0.00019312693302708794, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06420326471561566, "aux_distill/mean_u": 0.17593388530767018, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 180.375, "aux_distill/step_loss": 0.6418395061045885, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5214498510427011, "calib/avg_num_step_conf": 5.75, "calib/ece": 0.45962992125984253, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06299212598425197, "calib/gap": 0.03884342932803714, "calib/mean_conf": 0.22115748031496063, "calib/mu_c": 0.23568553459119498, "calib/mu_w": 0.19684210526315785, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.027401574803149607, "calib/std_conf": 0.28398247492775347, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.28976551724137933, "calib/step_q_c_n": 870.0, "calib/step_q_gap": 0.04845322488257528, "calib/step_q_w": 0.24131229235880405, "calib/step_q_w_n": 602.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2605.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 518.765625, "completions/mean_terminated_length": 520.800048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.15466666666666667, "grad_norm": 0.007395788095891476, "learning_rate": 1.527777777777778e-06, "loss": 0.0727, "num_tokens": 32528440.0, "reward": 1.0674223899841309, "reward_std": 0.18406549096107483, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.5293761491775513, "rewards/format_reward_step": 0.984375, "step": 145 }, { "aux_distill/final_loss": 0.008665551134754423, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07391537714283913, "aux_distill/mean_u": 0.16548633324082324, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 161.375, "aux_distill/step_loss": 0.7304882053285837, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5897919978802332, "calib/avg_num_step_conf": 5.1484375, "calib/ece": 0.284732, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.072, "calib/gap": 0.09475397456279805, "calib/mean_conf": 0.20478799999999997, "calib/mu_c": 0.26088235294117645, "calib/mu_w": 0.1661283783783784, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.04075999999999999, "calib/std_conf": 0.2854098650292242, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.27421766990291263, "calib/step_q_c_n": 515.0, "calib/step_q_gap": 0.07378803104861625, "calib/step_q_w": 0.20042963885429638, "calib/step_q_w_n": 803.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2811.0, "completions/max_terminated_length": 2811.0, "completions/mean_length": 539.015625, "completions/mean_terminated_length": 543.2598266601562, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.15573333333333333, "grad_norm": 0.006177644710987806, "learning_rate": 1.5e-06, "loss": 0.1055, "num_tokens": 32773644.0, "reward": 1.012442946434021, "reward_std": 0.18265041708946228, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6576983332633972, "rewards/format_reward_step": 0.96875, "step": 146 }, { "aux_distill/final_loss": 0.0001339657533208083, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06743633723817766, "aux_distill/mean_u": 0.17703730969472667, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 162.625, "aux_distill/step_loss": 0.6742293937131763, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.536065786065786, "calib/avg_num_step_conf": 5.08203125, "calib/ece": 0.37140562248995973, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.09236947791164658, "calib/gap": 0.018074980574980498, "calib/mean_conf": 0.24947791164658634, "calib/mu_c": 0.259059829059829, "calib/mu_w": 0.2409848484848485, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.07550200803212852, "calib/std_conf": 0.31741951595719226, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2965528455284553, "calib/step_q_c_n": 615.0, "calib/step_q_gap": -0.009103131147929544, "calib/step_q_w": 0.30565597667638483, "calib/step_q_w_n": 686.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3039.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 532.34765625, "completions/mean_terminated_length": 538.6600952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.1568, "grad_norm": 0.006623013410717249, "learning_rate": 1.4722222222222225e-06, "loss": 0.0796, "num_tokens": 33013605.0, "reward": 1.0117859840393066, "reward_std": 0.19896234571933746, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5938844084739685, "rewards/format_reward_step": 0.97265625, "step": 147 }, { "aux_distill/final_loss": 0.00013361726644234295, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0728413398610428, "aux_distill/mean_u": 0.21564378818813051, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 165.5, "aux_distill/step_loss": 0.728279777802527, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5446356275303643, "calib/avg_num_step_conf": 5.21875, "calib/ece": 0.45294820717131484, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05976095617529881, "calib/gap": 0.056703778677462846, "calib/mean_conf": 0.21629482071713146, "calib/mu_c": 0.23775641025641028, "calib/mu_w": 0.18105263157894744, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.023864541832669325, "calib/std_conf": 0.2871709388857196, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2900251256281407, "calib/step_q_c_n": 796.0, "calib/step_q_gap": 0.03487697747999258, "calib/step_q_w": 0.2551481481481481, "calib/step_q_w_n": 540.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 496.66796875, "completions/mean_terminated_length": 502.5573425292969, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.15786666666666666, "grad_norm": 0.0063850851729512215, "learning_rate": 1.4444444444444445e-06, "loss": 0.084, "num_tokens": 33245864.0, "reward": 1.0619884729385376, "reward_std": 0.1776794195175171, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.5341331958770752, "rewards/format_reward_step": 0.98046875, "step": 148 }, { "aux_distill/final_loss": 0.00015465507885892293, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07203872106038034, "aux_distill/mean_u": 0.18082896538288876, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 169.5, "aux_distill/step_loss": 0.7202325398102403, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48552766393442626, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.4096, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.088, "calib/gap": -0.012695952868852445, "calib/mean_conf": 0.23232, "calib/mu_c": 0.22581967213114754, "calib/mu_w": 0.23851562499999998, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07696000000000001, "calib/std_conf": 0.31102981464804946, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2796116504854369, "calib/step_q_c_n": 618.0, "calib/step_q_gap": -0.025774836001049573, "calib/step_q_w": 0.3053864864864865, "calib/step_q_w_n": 740.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2807.0, "completions/max_terminated_length": 2807.0, "completions/mean_length": 536.171875, "completions/mean_terminated_length": 542.5296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.15893333333333334, "grad_norm": 0.005627046804875135, "learning_rate": 1.4166666666666667e-06, "loss": 0.0962, "num_tokens": 33487580.0, "reward": 1.0105897188186646, "reward_std": 0.21034207940101624, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5680545568466187, "rewards/format_reward_step": 0.9765625, "step": 149 }, { "aux_distill/final_loss": 0.00021909270560627192, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07247080851811916, "aux_distill/mean_u": 0.2084789292080666, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 158.875, "aux_distill/step_loss": 0.7244889652356505, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5141650422352176, "calib/avg_num_step_conf": 5.03125, "calib/ece": 0.4372690763052209, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.07228915662650602, "calib/gap": -0.008651072124756326, "calib/mean_conf": 0.19759036144578312, "calib/mu_c": 0.19362962962962962, "calib/mu_w": 0.20228070175438595, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.046345381526104415, "calib/std_conf": 0.28539941867528507, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.2518639053254438, "calib/step_q_c_n": 676.0, "calib/step_q_gap": 0.00945197722086863, "calib/step_q_w": 0.24241192810457515, "calib/step_q_w_n": 612.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 484.25390625, "completions/mean_terminated_length": 488.0669250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.16, "grad_norm": 0.006086606997996569, "learning_rate": 1.3888888888888892e-06, "loss": 0.1095, "num_tokens": 33716509.0, "reward": 1.0141351222991943, "reward_std": 0.1790996640920639, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5321765542030334, "rewards/format_reward_step": 0.96875, "step": 150 }, { "aux_distill/final_loss": 0.000140511143627009, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07645413489080966, "aux_distill/mean_u": 0.22830244143183132, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 152.75, "aux_distill/step_loss": 0.7644008286297321, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5012140131807146, "calib/avg_num_step_conf": 4.828125, "calib/ece": 0.3078584677419355, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.04838709677419355, "calib/gap": -0.007641505376344143, "calib/mean_conf": 0.2039157258064516, "calib/mu_c": 0.19913978494623655, "calib/mu_w": 0.2067812903225807, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06838709677419355, "calib/std_conf": 0.2666086343424099, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.25983981693363845, "calib/step_q_c_n": 437.0, "calib/step_q_gap": 0.008340317559420662, "calib/step_q_w": 0.2514994993742178, "calib/step_q_w_n": 799.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 553.6640625, "completions/mean_terminated_length": 558.0236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.16106666666666666, "grad_norm": 0.006590255536139011, "learning_rate": 1.3611111111111112e-06, "loss": 0.0612, "num_tokens": 33965271.0, "reward": 0.9828296303749084, "reward_std": 0.17969170212745667, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6375342607498169, "rewards/format_reward_step": 0.96484375, "step": 151 }, { "aux_distill/final_loss": 0.010517073303617508, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.08004386606626213, "aux_distill/mean_u": 0.1953560004997596, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 181.375, "aux_distill/step_loss": 0.7899215742945671, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5205649000317358, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.3487797619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.06746031746031746, "calib/gap": 0.02912456363059346, "calib/mean_conf": 0.20860119047619047, "calib/mu_c": 0.22443478260869565, "calib/mu_w": 0.1953102189781022, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.050515873015873014, "calib/std_conf": 0.28373842308837555, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3100977653631285, "calib/step_q_c_n": 716.0, "calib/step_q_gap": 0.028001181920290108, "calib/step_q_w": 0.2820965834428384, "calib/step_q_w_n": 761.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2882.0, "completions/max_terminated_length": 2882.0, "completions/mean_length": 510.4609375, "completions/mean_terminated_length": 514.4802856445312, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.16213333333333332, "grad_norm": 0.006801180075854063, "learning_rate": 1.3333333333333334e-06, "loss": 0.0875, "num_tokens": 34201341.0, "reward": 1.012510061264038, "reward_std": 0.20159053802490234, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6031451225280762, "rewards/format_reward_step": 0.97265625, "step": 152 }, { "aux_distill/final_loss": 0.0005106443512659098, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0817980298306793, "aux_distill/mean_u": 0.26963061827811996, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 157.125, "aux_distill/step_loss": 0.8174696424975991, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5102924890886587, "calib/avg_num_step_conf": 5.2578125, "calib/ece": 0.414717741935484, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.07661290322580645, "calib/gap": 0.009611751677415153, "calib/mean_conf": 0.20600806451612905, "calib/mu_c": 0.21062015503875967, "calib/mu_w": 0.2010084033613445, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05028225806451612, "calib/std_conf": 0.297691033704186, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.273472, "calib/step_q_c_n": 625.0, "calib/step_q_gap": 0.02140542579750343, "calib/step_q_w": 0.25206657420249656, "calib/step_q_w_n": 721.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2564.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 496.21875, "completions/mean_terminated_length": 506.1036071777344, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.1632, "grad_norm": 0.007670701947063208, "learning_rate": 1.3055555555555556e-06, "loss": 0.0914, "num_tokens": 34435693.0, "reward": 1.0133540630340576, "reward_std": 0.19515293836593628, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.55014568567276, "rewards/format_reward_step": 0.96875, "step": 153 }, { "aux_distill/final_loss": 0.00012748148060381936, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07031724287662655, "aux_distill/mean_u": 0.1923050521109305, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 159.875, "aux_distill/step_loss": 0.7030449416488409, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4036673553719008, "calib/avg_num_step_conf": 4.99609375, "calib/ece": 0.4186345381526104, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.10843373493975904, "calib/gap": -0.07913094008264462, "calib/mean_conf": 0.245140562248996, "calib/mu_c": 0.20446280991735535, "calib/mu_w": 0.28359375, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08891566265060241, "calib/std_conf": 0.3174093177922958, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2714490161001789, "calib/step_q_c_n": 559.0, "calib/step_q_gap": -0.036828761677598854, "calib/step_q_w": 0.30827777777777776, "calib/step_q_w_n": 720.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2829.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 495.26953125, "completions/mean_terminated_length": 501.1423034667969, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.16426666666666667, "grad_norm": 0.0063225575722754, "learning_rate": 1.2777777777777779e-06, "loss": 0.0669, "num_tokens": 34666922.0, "reward": 0.9910746812820435, "reward_std": 0.2115459442138672, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5368367433547974, "rewards/format_reward_step": 0.97265625, "step": 154 }, { "aux_distill/final_loss": 0.00013211382838562713, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07154768169857562, "aux_distill/mean_u": 0.19281167140165867, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 172.75, "aux_distill/step_loss": 0.715344687923789, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5510559006211181, "calib/avg_num_step_conf": 5.3984375, "calib/ece": 0.33588235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03529411764705882, "calib/gap": 0.051487577639751536, "calib/mean_conf": 0.19086274509803922, "calib/mu_c": 0.21913043478260869, "calib/mu_w": 0.16764285714285715, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.037882352941176464, "calib/std_conf": 0.2686554042991339, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2655172413793103, "calib/step_q_c_n": 609.0, "calib/step_q_gap": 0.05232189855912919, "calib/step_q_w": 0.21319534282018113, "calib/step_q_w_n": 773.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1192.0, "completions/max_terminated_length": 1192.0, "completions/mean_length": 466.47265625, "completions/mean_terminated_length": 468.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.16533333333333333, "grad_norm": 0.007305369712412357, "learning_rate": 1.25e-06, "loss": 0.0993, "num_tokens": 34893555.0, "reward": 1.0365357398986816, "reward_std": 0.1448049545288086, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6316652297973633, "rewards/format_reward_step": 0.9921875, "step": 155 }, { "aux_distill/final_loss": 0.00015378354362383106, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07364484970457852, "aux_distill/mean_u": 0.19237760412891566, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 172.375, "aux_distill/step_loss": 0.7362947026267648, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4962834645669292, "calib/avg_num_step_conf": 5.45703125, "calib/ece": 0.4131349206349206, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05952380952380952, "calib/gap": -0.019175433070866194, "calib/mean_conf": 0.22297619047619047, "calib/mu_c": 0.21346456692913385, "calib/mu_w": 0.23264000000000004, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06607142857142856, "calib/std_conf": 0.27947845316287434, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.27607246376811595, "calib/step_q_c_n": 690.0, "calib/step_q_gap": -0.01774677244121925, "calib/step_q_w": 0.2938192362093352, "calib/step_q_w_n": 707.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2371.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 489.56640625, "completions/mean_terminated_length": 493.4212646484375, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.1664, "grad_norm": 0.005915569607168436, "learning_rate": 1.2222222222222223e-06, "loss": 0.0569, "num_tokens": 35123644.0, "reward": 1.0273587703704834, "reward_std": 0.17374806106090546, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5742487907409668, "rewards/format_reward_step": 0.984375, "step": 156 }, { "aux_distill/final_loss": 0.00880513777633496, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07628553418908268, "aux_distill/mean_u": 0.2348693963470195, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 182.75, "aux_distill/step_loss": 0.7540501952171326, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4681208053691275, "calib/avg_num_step_conf": 5.85546875, "calib/ece": 0.42094117647058826, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.002948588071419511, "calib/mean_conf": 0.22384313725490196, "calib/mu_c": 0.22261744966442953, "calib/mu_w": 0.22556603773584905, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03023529411764707, "calib/std_conf": 0.27768958353860507, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.3245949074074074, "calib/step_q_c_n": 864.0, "calib/step_q_gap": 0.08065789953339167, "calib/step_q_w": 0.2439370078740157, "calib/step_q_w_n": 635.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1144.0, "completions/max_terminated_length": 1144.0, "completions/mean_length": 468.7109375, "completions/mean_terminated_length": 470.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.16746666666666668, "grad_norm": 0.006663069594651461, "learning_rate": 1.1944444444444446e-06, "loss": 0.0444, "num_tokens": 35347362.0, "reward": 1.062303900718689, "reward_std": 0.16383245587348938, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.5464828014373779, "rewards/format_reward_step": 0.99609375, "step": 157 }, { "aux_distill/final_loss": 0.005067012140671068, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07419109647162259, "aux_distill/mean_u": 0.20995247473327852, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 169.25, "aux_distill/step_loss": 0.7368439408019185, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4461863604819885, "calib/avg_num_step_conf": 5.2890625, "calib/ece": 0.42595238095238097, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": -0.05374550501545641, "calib/mean_conf": 0.21412698412698414, "calib/mu_c": 0.1883206106870229, "calib/mu_w": 0.24206611570247932, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.060119047619047614, "calib/std_conf": 0.29138086634481836, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.25353722627737224, "calib/step_q_c_n": 685.0, "calib/step_q_gap": -0.06499790077793416, "calib/step_q_w": 0.3185351270553064, "calib/step_q_w_n": 669.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1386.0, "completions/max_terminated_length": 1386.0, "completions/mean_length": 460.38671875, "completions/mean_terminated_length": 464.0118103027344, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.16853333333333334, "grad_norm": 0.006863833405077457, "learning_rate": 1.1666666666666668e-06, "loss": 0.0773, "num_tokens": 35570461.0, "reward": 1.0202934741973877, "reward_std": 0.174790158867836, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5366804599761963, "rewards/format_reward_step": 0.984375, "step": 158 }, { "aux_distill/final_loss": 7.75144445128717e-05, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07439770107157528, "aux_distill/mean_u": 0.197982069789905, "aux_distill/n_active_final_tok": 31.0, "aux_distill/n_active_tok": 153.875, "aux_distill/step_loss": 0.7438994897529483, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5158219623131903, "calib/avg_num_step_conf": 4.81640625, "calib/ece": 0.3983132530120482, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.06827309236947791, "calib/gap": 0.048635477582845976, "calib/mean_conf": 0.19847389558232933, "calib/mu_c": 0.2207407407407407, "calib/mu_w": 0.17210526315789473, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02730923694779116, "calib/std_conf": 0.27711088410534096, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.32632558139534884, "calib/step_q_c_n": 645.0, "calib/step_q_gap": 0.0776861256130359, "calib/step_q_w": 0.24863945578231295, "calib/step_q_w_n": 588.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1435.0, "completions/max_terminated_length": 1435.0, "completions/mean_length": 436.8515625, "completions/mean_terminated_length": 445.5538024902344, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.1696, "grad_norm": 0.006662644911557436, "learning_rate": 1.138888888888889e-06, "loss": 0.0472, "num_tokens": 35787079.0, "reward": 1.028663158416748, "reward_std": 0.1973605453968048, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5612324476242065, "rewards/format_reward_step": 0.96875, "step": 159 }, { "aux_distill/final_loss": 0.0059038225367658015, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07914891897235066, "aux_distill/mean_u": 0.1979178449540299, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 157.25, "aux_distill/step_loss": 0.7855853512883186, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5008529727680546, "calib/avg_num_step_conf": 5.14453125, "calib/ece": 0.37162698412698403, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.0014285714285714457, "calib/mean_conf": 0.23496031746031745, "calib/mu_c": 0.23571428571428574, "calib/mu_w": 0.2342857142857143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06718253968253968, "calib/std_conf": 0.2983173685750631, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.309696394686907, "calib/step_q_c_n": 527.0, "calib/step_q_gap": 0.03393310354766649, "calib/step_q_w": 0.27576329113924053, "calib/step_q_w_n": 790.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2871.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 462.08984375, "completions/mean_terminated_length": 465.72833251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.17066666666666666, "grad_norm": 0.006196652539074421, "learning_rate": 1.111111111111111e-06, "loss": 0.0816, "num_tokens": 36010214.0, "reward": 1.0190675258636475, "reward_std": 0.20500755310058594, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5928226709365845, "rewards/format_reward_step": 0.98046875, "step": 160 }, { "aux_distill/final_loss": 0.00010458140604896471, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06749991769902408, "aux_distill/mean_u": 0.17813880146622404, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 156.625, "aux_distill/step_loss": 0.6748945843428373, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5069444444444444, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.48333333333333345, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": 0.002220853858784927, "calib/mean_conf": 0.22364705882352942, "calib/mu_c": 0.2244047619047619, "calib/mu_w": 0.22218390804597699, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.024078431372549027, "calib/std_conf": 0.2661556984636677, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2867741935483871, "calib/step_q_c_n": 775.0, "calib/step_q_gap": -0.028267647455796963, "calib/step_q_w": 0.3150418410041841, "calib/step_q_w_n": 478.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2907.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 461.68359375, "completions/mean_terminated_length": 461.68359375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.17173333333333332, "grad_norm": 0.007173808291554451, "learning_rate": 1.0833333333333335e-06, "loss": 0.1101, "num_tokens": 36232325.0, "reward": 1.0831670761108398, "reward_std": 0.15231254696846008, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.5139902234077454, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_distill/final_loss": 0.0001530974079742009, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06709114729892462, "aux_distill/mean_u": 0.18809368684457695, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 164.25, "aux_distill/step_loss": 0.6707583647221327, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5302566440854611, "calib/avg_num_step_conf": 5.1328125, "calib/ece": 0.4387632411067194, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05533596837944664, "calib/gap": 0.03453030224075038, "calib/mean_conf": 0.21483359683794467, "calib/mu_c": 0.22861842105263158, "calib/mu_w": 0.1940881188118812, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.026403162055335966, "calib/std_conf": 0.2695710060266586, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29939058171745153, "calib/step_q_c_n": 722.0, "calib/step_q_gap": 0.024668960095829917, "calib/step_q_w": 0.2747216216216216, "calib/step_q_w_n": 592.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2698.0, "completions/max_terminated_length": 2698.0, "completions/mean_length": 460.08203125, "completions/mean_terminated_length": 461.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1728, "grad_norm": 0.006731119938194752, "learning_rate": 1.0555555555555557e-06, "loss": 0.1222, "num_tokens": 36454250.0, "reward": 1.061426043510437, "reward_std": 0.1838788390159607, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.544727087020874, "rewards/format_reward_step": 0.984375, "step": 162 }, { "aux_distill/final_loss": 0.0001396006812228734, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07542417175136507, "aux_distill/mean_u": 0.22200943521140018, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 164.0, "aux_distill/step_loss": 0.7541021099314094, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5280830280830281, "calib/avg_num_step_conf": 5.23046875, "calib/ece": 0.4085516, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.064, "calib/gap": 0.03042482488271958, "calib/mean_conf": 0.20064839999999998, "calib/mu_c": 0.21488721804511277, "calib/mu_w": 0.18446239316239318, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.038599999999999995, "calib/std_conf": 0.2834424195801327, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2612359550561798, "calib/step_q_c_n": 623.0, "calib/step_q_gap": 0.005341541648358539, "calib/step_q_w": 0.25589441340782126, "calib/step_q_w_n": 716.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 472.203125, "completions/mean_terminated_length": 481.6095886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.17386666666666667, "grad_norm": 0.006165460217744112, "learning_rate": 1.0277777777777777e-06, "loss": 0.0517, "num_tokens": 36679966.0, "reward": 1.0293166637420654, "reward_std": 0.18782052397727966, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5625396966934204, "rewards/format_reward_step": 0.9765625, "step": 163 }, { "aux_distill/final_loss": 0.00014845711132238648, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0759313030866906, "aux_distill/mean_u": 0.18177047881912312, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 154.875, "aux_distill/step_loss": 0.7591645568609238, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48111811631497686, "calib/avg_num_step_conf": 4.84765625, "calib/ece": 0.4277200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.084, "calib/gap": -0.015193000514668015, "calib/mean_conf": 0.22996, "calib/mu_c": 0.22291044776119404, "calib/mu_w": 0.23810344827586205, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.060840000000000005, "calib/std_conf": 0.3054629247551984, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2821812596006144, "calib/step_q_c_n": 651.0, "calib/step_q_gap": -0.024954842094300822, "calib/step_q_w": 0.30713610169491523, "calib/step_q_w_n": 590.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3021.0, "completions/max_terminated_length": 3021.0, "completions/mean_length": 516.4765625, "completions/mean_terminated_length": 518.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.17493333333333333, "grad_norm": 0.0059613315388560295, "learning_rate": 1.0000000000000002e-06, "loss": 0.1579, "num_tokens": 36918320.0, "reward": 1.0218608379364014, "reward_std": 0.19381961226463318, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5437214374542236, "rewards/format_reward_step": 0.9765625, "step": 164 }, { "aux_distill/final_loss": 0.00011706302370839694, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.081173887825571, "aux_distill/mean_u": 0.2349289558838646, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 158.125, "aux_distill/step_loss": 0.8116217972710729, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4514854614412136, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.34383399209486165, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04743083003952569, "calib/gap": -0.007164348925410846, "calib/mean_conf": 0.1923715415019763, "calib/mu_c": 0.18840707964601772, "calib/mu_w": 0.19557142857142856, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04478260869565218, "calib/std_conf": 0.25780448113727916, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23833005893909626, "calib/step_q_c_n": 509.0, "calib/step_q_gap": 0.0017824398914772221, "calib/step_q_w": 0.23654761904761903, "calib/step_q_w_n": 756.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1477.0, "completions/max_terminated_length": 1477.0, "completions/mean_length": 480.9453125, "completions/mean_terminated_length": 484.7322692871094, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.176, "grad_norm": 0.007403014227747917, "learning_rate": 9.722222222222224e-07, "loss": 0.0772, "num_tokens": 37147018.0, "reward": 1.0222697257995605, "reward_std": 0.14842969179153442, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6109457015991211, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "aux_distill/final_loss": 0.00015963665589424636, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07058893702924252, "aux_distill/mean_u": 0.21124927093897258, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 154.875, "aux_distill/step_loss": 0.7057297229766846, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48443930041152267, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.4321428571428571, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.004398148148148179, "calib/mean_conf": 0.21547619047619046, "calib/mu_c": 0.21736111111111114, "calib/mu_w": 0.21296296296296297, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0380952380952381, "calib/std_conf": 0.2738586912377256, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.26814132104454685, "calib/step_q_c_n": 651.0, "calib/step_q_gap": -0.014829816476675584, "calib/step_q_w": 0.28297113752122244, "calib/step_q_w_n": 589.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 530.421875, "completions/mean_terminated_length": 532.5020141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.17706666666666668, "grad_norm": 0.0061018322594463825, "learning_rate": 9.444444444444445e-07, "loss": 0.0774, "num_tokens": 37388990.0, "reward": 1.0370354652404785, "reward_std": 0.20210300385951996, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.5389144420623779, "rewards/format_reward_step": 0.97265625, "step": 166 }, { "aux_distill/final_loss": 0.00012661207426845067, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07728934357874095, "aux_distill/mean_u": 0.26433023776783365, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 151.375, "aux_distill/step_loss": 0.7727668080478907, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5267295597484277, "calib/avg_num_step_conf": 4.73046875, "calib/ece": 0.510395256916996, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.039525691699604744, "calib/gap": -0.006399705606851325, "calib/mean_conf": 0.16778656126482214, "calib/mu_c": 0.16540880503144656, "calib/mu_w": 0.17180851063829788, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.024861660079051384, "calib/std_conf": 0.24258457206098058, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.23089552238805972, "calib/step_q_c_n": 737.0, "calib/step_q_gap": 0.004608602556836094, "calib/step_q_w": 0.22628691983122362, "calib/step_q_w_n": 474.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2733.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 477.91015625, "completions/mean_terminated_length": 481.6732177734375, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.17813333333333334, "grad_norm": 0.006885558366775513, "learning_rate": 9.166666666666666e-07, "loss": 0.0723, "num_tokens": 37616943.0, "reward": 1.0405546426773071, "reward_std": 0.15715563297271729, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.4834531545639038, "rewards/format_reward_step": 0.9765625, "step": 167 }, { "aux_distill/final_loss": 0.013254495221190155, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07616680616047233, "aux_distill/mean_u": 0.25441551732470813, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 149.875, "aux_distill/step_loss": 0.7484135506674647, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5167754247722236, "calib/avg_num_step_conf": 4.68359375, "calib/ece": 0.39211764705882346, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.004357916769268638, "calib/mean_conf": 0.20074509803921567, "calib/mu_c": 0.20298387096774193, "calib/mu_w": 0.1986259541984733, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.053294117647058825, "calib/std_conf": 0.2808834691830128, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.22377016129032257, "calib/step_q_c_n": 496.0, "calib/step_q_gap": -0.02783723558023221, "calib/step_q_w": 0.2516073968705548, "calib/step_q_w_n": 703.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2260.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 536.64453125, "completions/mean_terminated_length": 536.64453125, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.1792, "grad_norm": 0.005739326123148203, "learning_rate": 8.88888888888889e-07, "loss": 0.1019, "num_tokens": 37858996.0, "reward": 1.0350497961044312, "reward_std": 0.155503511428833, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5896308422088623, "rewards/format_reward_step": 0.99609375, "step": 168 }, { "aux_distill/final_loss": 0.00016585832759119512, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07224566210061312, "aux_distill/mean_u": 0.1620567446785026, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 137.75, "aux_distill/step_loss": 0.7222907477989793, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47317228226319136, "calib/avg_num_step_conf": 4.3046875, "calib/ece": 0.39422310756972107, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.06772908366533864, "calib/gap": 0.009647806738715847, "calib/mean_conf": 0.21119521912350597, "calib/mu_c": 0.21584615384615385, "calib/mu_w": 0.206198347107438, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04374501992031873, "calib/std_conf": 0.2894605604789032, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.30069444444444443, "calib/step_q_c_n": 576.0, "calib/step_q_gap": 0.029686839881706784, "calib/step_q_w": 0.27100760456273765, "calib/step_q_w_n": 526.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2795.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 488.0703125, "completions/mean_terminated_length": 491.91339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.18026666666666666, "grad_norm": 0.006307313684374094, "learning_rate": 8.611111111111112e-07, "loss": 0.1095, "num_tokens": 38088126.0, "reward": 1.0232303142547607, "reward_std": 0.18713515996932983, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5620855093002319, "rewards/format_reward_step": 0.9765625, "step": 169 }, { "aux_distill/final_loss": 0.0001271916736413914, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07005494262557477, "aux_distill/mean_u": 0.1803852533446794, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 146.875, "aux_distill/step_loss": 0.7004222283139825, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4994199162101193, "calib/avg_num_step_conf": 4.58984375, "calib/ece": 0.4657142857142857, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": -0.02286045762165645, "calib/mean_conf": 0.19460317460317464, "calib/mu_c": 0.18489655172413794, "calib/mu_w": 0.20775700934579439, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.042460317460317455, "calib/std_conf": 0.2587658462095125, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.25309985096870347, "calib/step_q_c_n": 671.0, "calib/step_q_gap": -0.04344776807891554, "calib/step_q_w": 0.296547619047619, "calib/step_q_w_n": 504.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 522.46484375, "completions/mean_terminated_length": 524.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.18133333333333335, "grad_norm": 0.005817190278321505, "learning_rate": 8.333333333333333e-07, "loss": 0.0844, "num_tokens": 38326029.0, "reward": 1.0375053882598877, "reward_std": 0.16187521815299988, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.5242296457290649, "rewards/format_reward_step": 0.984375, "step": 170 }, { "aux_distill/final_loss": 0.00012730757214285404, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0673176534473896, "aux_distill/mean_u": 0.1913334194705908, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 144.5, "aux_distill/step_loss": 0.6730492170900106, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5233848720087253, "calib/avg_num_step_conf": 4.6640625, "calib/ece": 0.3166269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06349206349206349, "calib/gap": 0.024855969718355048, "calib/mean_conf": 0.21543650793650795, "calib/mu_c": 0.22954128440366972, "calib/mu_w": 0.20468531468531467, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04976190476190476, "calib/std_conf": 0.2760337854876874, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.2694093686354379, "calib/step_q_c_n": 491.0, "calib/step_q_gap": 0.01905374985876651, "calib/step_q_w": 0.2503556187766714, "calib/step_q_w_n": 703.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1202.0, "completions/max_terminated_length": 1202.0, "completions/mean_length": 474.3046875, "completions/mean_terminated_length": 479.9288635253906, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.1824, "grad_norm": 0.007274095434695482, "learning_rate": 8.055555555555557e-07, "loss": 0.026, "num_tokens": 38554347.0, "reward": 1.0217634439468384, "reward_std": 0.17246311902999878, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6333706974983215, "rewards/format_reward_step": 0.984375, "step": 171 }, { "aux_distill/final_loss": 0.00013447724222714896, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06745812157168984, "aux_distill/mean_u": 0.16089452147621752, "aux_distill/n_active_final_tok": 31.875, "aux_distill/n_active_tok": 137.375, "aux_distill/step_loss": 0.6744467271491885, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.494122965641953, "calib/avg_num_step_conf": 4.29296875, "calib/ece": 0.48296875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0703125, "calib/gap": 0.023867217773185234, "calib/mean_conf": 0.205546875, "calib/mu_c": 0.21468354430379752, "calib/mu_w": 0.19081632653061228, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0356640625, "calib/std_conf": 0.28871024778787185, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2499405646359584, "calib/step_q_c_n": 673.0, "calib/step_q_gap": 0.00264478998807105, "calib/step_q_w": 0.24729577464788735, "calib/step_q_w_n": 426.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 457.23828125, "completions/mean_terminated_length": 459.0314025878906, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.18346666666666667, "grad_norm": 0.00690126558765769, "learning_rate": 7.777777777777779e-07, "loss": 0.057, "num_tokens": 38774752.0, "reward": 1.0621392726898193, "reward_std": 0.1700059324502945, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5149035453796387, "rewards/format_reward_step": 0.9921875, "step": 172 }, { "aux_distill/final_loss": 0.0001626137621997259, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06854678073432297, "aux_distill/mean_u": 0.18265357143386787, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 163.25, "aux_distill/step_loss": 0.6853051725775003, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5511278664007976, "calib/avg_num_step_conf": 5.125, "calib/ece": 0.3881889763779527, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.05203888334995019, "calib/mean_conf": 0.19023622047244093, "calib/mu_c": 0.21441176470588239, "calib/mu_w": 0.1623728813559322, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.02149606299212599, "calib/std_conf": 0.25156506826260105, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.295595567867036, "calib/step_q_c_n": 722.0, "calib/step_q_gap": 0.05186675430771395, "calib/step_q_w": 0.24372881355932205, "calib/step_q_w_n": 590.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1568.0, "completions/max_terminated_length": 1568.0, "completions/mean_length": 511.26171875, "completions/mean_terminated_length": 513.2667236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.18453333333333333, "grad_norm": 0.006458665709942579, "learning_rate": 7.5e-07, "loss": 0.0733, "num_tokens": 39008795.0, "reward": 1.0473742485046387, "reward_std": 0.16939207911491394, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5830296277999878, "rewards/format_reward_step": 0.98046875, "step": 173 }, { "aux_distill/final_loss": 0.0005857845847003773, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.072768610320054, "aux_distill/mean_u": 0.20042516857950027, "aux_distill/n_active_final_tok": 30.875, "aux_distill/n_active_tok": 157.25, "aux_distill/step_loss": 0.7271003052592278, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.4796011768551814, "calib/avg_num_step_conf": 4.953125, "calib/ece": 0.4326032258064516, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.08870967741935484, "calib/gap": -0.0583984831644328, "calib/mean_conf": 0.22457419354838706, "calib/mu_c": 0.19325565217391305, "calib/mu_w": 0.25165413533834585, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.09673387096774194, "calib/std_conf": 0.312205231267513, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.2402541889483066, "calib/step_q_c_n": 561.0, "calib/step_q_gap": -0.05041082521599935, "calib/step_q_w": 0.29066501416430596, "calib/step_q_w_n": 706.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2481.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 533.84375, "completions/mean_terminated_length": 544.4780883789062, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1856, "grad_norm": 0.005857736337929964, "learning_rate": 7.222222222222222e-07, "loss": 0.0468, "num_tokens": 39249691.0, "reward": 0.9747790098190308, "reward_std": 0.22227507829666138, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5433081388473511, "rewards/format_reward_step": 0.95703125, "step": 174 }, { "aux_distill/final_loss": 0.00022364286769516184, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07627740735188127, "aux_distill/mean_u": 0.21568897692772337, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 146.125, "aux_distill/step_loss": 0.762550413608551, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5145747934153165, "calib/avg_num_step_conf": 4.57421875, "calib/ece": 0.331, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.076, "calib/gap": 0.03670700761272691, "calib/mean_conf": 0.2026, "calib/mu_c": 0.223302752293578, "calib/mu_w": 0.18659574468085108, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.04879999999999999, "calib/std_conf": 0.2865994417300913, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.29833333333333334, "calib/step_q_c_n": 498.0, "calib/step_q_gap": 0.06271669143140171, "calib/step_q_w": 0.23561664190193163, "calib/step_q_w_n": 673.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 492.49609375, "completions/mean_terminated_length": 500.3135070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.18666666666666668, "grad_norm": 0.007421276066452265, "learning_rate": 6.944444444444446e-07, "loss": 0.0739, "num_tokens": 39481594.0, "reward": 1.0114911794662476, "reward_std": 0.19522380828857422, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6206386685371399, "rewards/format_reward_step": 0.9765625, "step": 175 }, { "aux_distill/final_loss": 0.0001829939112667489, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07527686783578247, "aux_distill/mean_u": 0.23696593019136702, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 150.0, "aux_distill/step_loss": 0.7525856709107757, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.529479302426612, "calib/avg_num_step_conf": 4.703125, "calib/ece": 0.42625984251968496, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08661417322834646, "calib/gap": 0.01572705269037422, "calib/mean_conf": 0.1993307086614173, "calib/mu_c": 0.20694656488549618, "calib/mu_w": 0.19121951219512195, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.054921259842519686, "calib/std_conf": 0.2925787209532093, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2625811965811966, "calib/step_q_c_n": 585.0, "calib/step_q_gap": 0.016991535838062538, "calib/step_q_w": 0.24558966074313407, "calib/step_q_w_n": 619.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1906.0, "completions/max_terminated_length": 1906.0, "completions/mean_length": 498.39453125, "completions/mean_terminated_length": 500.34906005859375, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.18773333333333334, "grad_norm": 0.006700032390654087, "learning_rate": 6.666666666666667e-07, "loss": 0.0805, "num_tokens": 39713247.0, "reward": 1.0359079837799072, "reward_std": 0.16493433713912964, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5679097175598145, "rewards/format_reward_step": 0.9921875, "step": 176 }, { "aux_distill/final_loss": 0.015147438318081186, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07469213579315692, "aux_distill/mean_u": 0.1922451938702458, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 146.375, "aux_distill/step_loss": 0.7317739073187113, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4699219733199094, "calib/avg_num_step_conf": 4.57421875, "calib/ece": 0.4300395256916996, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": -0.0006972061414548258, "calib/mean_conf": 0.18893280632411066, "calib/mu_c": 0.18861313868613142, "calib/mu_w": 0.18931034482758624, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.03873517786561265, "calib/std_conf": 0.27523049526630516, "calib/step_conf_rate": 0.984375, "calib/step_q_c": 0.2358448275862069, "calib/step_q_c_n": 580.0, "calib/step_q_gap": 0.017384590699574043, "calib/step_q_w": 0.21846023688663285, "calib/step_q_w_n": 591.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2900.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 503.703125, "completions/mean_terminated_length": 507.6692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.1888, "grad_norm": 0.006349343340843916, "learning_rate": 6.388888888888889e-07, "loss": 0.1273, "num_tokens": 39946027.0, "reward": 1.026729702949524, "reward_std": 0.17228364944458008, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.5378344058990479, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "aux_distill/final_loss": 0.00010571018469818227, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07261781021952629, "aux_distill/mean_u": 0.19784925852864274, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 159.5, "aux_distill/step_loss": 0.7260723812505603, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5133538461538462, "calib/avg_num_step_conf": 4.9921875, "calib/ece": 0.39415686274509804, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": 0.007978461538461518, "calib/mean_conf": 0.20662745098039217, "calib/mu_c": 0.2105384615384615, "calib/mu_w": 0.20256, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.045490196078431376, "calib/std_conf": 0.2869631363074095, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.27271159874608153, "calib/step_q_c_n": 638.0, "calib/step_q_gap": 0.010164723746081517, "calib/step_q_w": 0.262546875, "calib/step_q_w_n": 640.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 955.0, "completions/max_terminated_length": 955.0, "completions/mean_length": 461.99609375, "completions/mean_terminated_length": 463.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 121.0, "epoch": 0.18986666666666666, "grad_norm": 0.006635536905378103, "learning_rate": 6.111111111111112e-07, "loss": 0.0964, "num_tokens": 40170370.0, "reward": 1.0407307147979736, "reward_std": 0.16870535910129547, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5775550603866577, "rewards/format_reward_step": 0.99609375, "step": 178 }, { "aux_distill/final_loss": 9.58011798957159e-05, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07280069135595113, "aux_distill/mean_u": 0.20255241438903543, "aux_distill/n_active_final_tok": 30.75, "aux_distill/n_active_tok": 150.5, "aux_distill/step_loss": 0.7279111016541719, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5251181049970058, "calib/avg_num_step_conf": 4.8046875, "calib/ece": 0.4478861788617886, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.044715447154471545, "calib/gap": 0.004235810765852666, "calib/mean_conf": 0.15512195121951222, "calib/mu_c": 0.15706766917293233, "calib/mu_w": 0.15283185840707966, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.031178861788617875, "calib/std_conf": 0.2406997449972085, "calib/step_conf_rate": 0.98046875, "calib/step_q_c": 0.22209523809523812, "calib/step_q_c_n": 630.0, "calib/step_q_gap": -0.05324309523809523, "calib/step_q_w": 0.27533833333333335, "calib/step_q_w_n": 600.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 475.2734375, "completions/mean_terminated_length": 480.90911865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.19093333333333334, "grad_norm": 0.007111471612006426, "learning_rate": 5.833333333333334e-07, "loss": 0.0896, "num_tokens": 40398304.0, "reward": 1.003140926361084, "reward_std": 0.1745980978012085, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5258132815361023, "rewards/format_reward_step": 0.9609375, "step": 179 }, { "aux_distill/final_loss": 9.564100832903932e-05, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07134161016438156, "aux_distill/mean_u": 0.2064909479776691, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 155.75, "aux_distill/step_loss": 0.7133204592391849, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4595897897511734, "calib/avg_num_step_conf": 4.8671875, "calib/ece": 0.4600393700787401, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.08267716535433071, "calib/gap": -0.022557062946055445, "calib/mean_conf": 0.22484251968503935, "calib/mu_c": 0.21569536423841054, "calib/mu_w": 0.238252427184466, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04519685039370079, "calib/std_conf": 0.29550932743574304, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.30337893296853624, "calib/step_q_c_n": 731.0, "calib/step_q_gap": 0.0012041756869828335, "calib/step_q_w": 0.3021747572815534, "calib/step_q_w_n": 515.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2400.0, "completions/max_terminated_length": 2400.0, "completions/mean_length": 525.796875, "completions/mean_terminated_length": 525.796875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.192, "grad_norm": 0.006270106416195631, "learning_rate": 5.555555555555555e-07, "loss": 0.0957, "num_tokens": 40636764.0, "reward": 1.0510127544403076, "reward_std": 0.1763603389263153, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.5199941396713257, "rewards/format_reward_step": 0.9921875, "step": 180 }, { "aux_distill/final_loss": 0.0001657123884797329, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07269645959604532, "aux_distill/mean_u": 0.16722964074674762, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 143.375, "aux_distill/step_loss": 0.7267988743260503, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5390842398178598, "calib/avg_num_step_conf": 4.48046875, "calib/ece": 0.3808730158730158, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.019853275992916808, "calib/mean_conf": 0.16571428571428573, "calib/mu_c": 0.176271186440678, "calib/mu_w": 0.15641791044776118, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03916666666666667, "calib/std_conf": 0.24985211045237474, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2612891344383057, "calib/step_q_c_n": 543.0, "calib/step_q_gap": 0.06155403510055735, "calib/step_q_w": 0.19973509933774836, "calib/step_q_w_n": 604.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2047.0, "completions/max_terminated_length": 2047.0, "completions/mean_length": 449.953125, "completions/mean_terminated_length": 453.4960632324219, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.19306666666666666, "grad_norm": 0.007513204589486122, "learning_rate": 5.277777777777779e-07, "loss": 0.0637, "num_tokens": 40858216.0, "reward": 1.02138352394104, "reward_std": 0.15062540769577026, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5974546670913696, "rewards/format_reward_step": 0.984375, "step": 181 }, { "aux_distill/final_loss": 0.00011876001019572868, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0664075068780221, "aux_distill/mean_u": 0.18295012680177067, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 156.625, "aux_distill/step_loss": 0.6639563012868166, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5261798469387755, "calib/avg_num_step_conf": 4.89453125, "calib/ece": 0.41361111111111115, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.03301785714285713, "calib/mean_conf": 0.23718253968253966, "calib/mu_c": 0.25185714285714284, "calib/mu_w": 0.2188392857142857, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0476190476190476, "calib/std_conf": 0.30725163930216465, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2820192307692308, "calib/step_q_c_n": 728.0, "calib/step_q_gap": 0.02697161172161172, "calib/step_q_w": 0.2550476190476191, "calib/step_q_w_n": 525.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2480.0, "completions/max_terminated_length": 2480.0, "completions/mean_length": 516.96875, "completions/mean_terminated_length": 516.96875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.19413333333333332, "grad_norm": 0.006183520890772343, "learning_rate": 5.000000000000001e-07, "loss": 0.1237, "num_tokens": 41096720.0, "reward": 1.049910068511963, "reward_std": 0.1884315013885498, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5646636486053467, "rewards/format_reward_step": 0.984375, "step": 182 }, { "aux_distill/final_loss": 0.00017398772183696565, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07154302636627108, "aux_distill/mean_u": 0.17953013342888505, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 138.25, "aux_distill/step_loss": 0.7152562653645873, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5100299962504686, "calib/avg_num_step_conf": 4.34375, "calib/ece": 0.4064229249011858, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05533596837944664, "calib/gap": -0.011224221972253479, "calib/mean_conf": 0.18515810276679842, "calib/mu_c": 0.17952380952380953, "calib/mu_w": 0.190748031496063, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.046778656126482215, "calib/std_conf": 0.2659719566474682, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.23727272727272727, "calib/step_q_c_n": 550.0, "calib/step_q_gap": -0.0048482691685538815, "calib/step_q_w": 0.24212099644128116, "calib/step_q_w_n": 562.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2037.0, "completions/max_terminated_length": 2037.0, "completions/mean_length": 502.71875, "completions/mean_terminated_length": 504.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.1952, "grad_norm": 0.006408975459635258, "learning_rate": 4.7222222222222226e-07, "loss": 0.0843, "num_tokens": 41332096.0, "reward": 1.0169315338134766, "reward_std": 0.15201400220394135, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5612065196037292, "rewards/format_reward_step": 0.98046875, "step": 183 }, { "aux_distill/final_loss": 0.00013644496129927575, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.0730898812180385, "aux_distill/mean_u": 0.20595037076727493, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 149.75, "aux_distill/step_loss": 0.7307623568922281, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5064565770105715, "calib/avg_num_step_conf": 4.6796875, "calib/ece": 0.47584980237154145, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.08695652173913043, "calib/gap": 0.03185133145992239, "calib/mean_conf": 0.2167193675889328, "calib/mu_c": 0.22855345911949684, "calib/mu_w": 0.19670212765957445, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03205533596837946, "calib/std_conf": 0.29063768702452825, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.29242465753424657, "calib/step_q_c_n": 730.0, "calib/step_q_gap": 0.05242465753424652, "calib/step_q_w": 0.24000000000000005, "calib/step_q_w_n": 468.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2746.0, "completions/max_terminated_length": 2746.0, "completions/mean_length": 499.58984375, "completions/mean_terminated_length": 499.58984375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.19626666666666667, "grad_norm": 0.006794987712055445, "learning_rate": 4.444444444444445e-07, "loss": 0.1262, "num_tokens": 41565271.0, "reward": 1.0633325576782227, "reward_std": 0.1867324709892273, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.5211964845657349, "rewards/format_reward_step": 0.984375, "step": 184 }, { "aux_distill/final_loss": 0.00027000475074601127, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07329281861893833, "aux_distill/mean_u": 0.20704989977177685, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 156.125, "aux_distill/step_loss": 0.7326581748202443, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5283043032786885, "calib/avg_num_step_conf": 5.00390625, "calib/ece": 0.3753924, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.084, "calib/gap": 0.038675512295081926, "calib/mean_conf": 0.2294076, "calib/mu_c": 0.24828124999999995, "calib/mu_w": 0.20960573770491803, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0464, "calib/std_conf": 0.30866020070984207, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.28440901213171577, "calib/step_q_c_n": 577.0, "calib/step_q_gap": 0.04926142690444307, "calib/step_q_w": 0.2351475852272727, "calib/step_q_w_n": 704.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 492.12890625, "completions/mean_terminated_length": 501.9322814941406, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.19733333333333333, "grad_norm": 0.006193746812641621, "learning_rate": 4.1666666666666667e-07, "loss": 0.0648, "num_tokens": 41798176.0, "reward": 1.030439853668213, "reward_std": 0.19291558861732483, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5804111957550049, "rewards/format_reward_step": 0.9765625, "step": 185 }, { "aux_distill/final_loss": 0.00014655543202479748, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07584030227735639, "aux_distill/mean_u": 0.20989452434011863, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 160.875, "aux_distill/step_loss": 0.7582564577460289, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5442945795339412, "calib/avg_num_step_conf": 5.02734375, "calib/ece": 0.42108300395256903, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.11462450592885376, "calib/gap": 0.036618161094224916, "calib/mean_conf": 0.2497470355731225, "calib/mu_c": 0.26595744680851063, "calib/mu_w": 0.22933928571428572, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05675889328063241, "calib/std_conf": 0.3251910949917958, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.33207868383404865, "calib/step_q_c_n": 699.0, "calib/step_q_gap": 0.07436950016057925, "calib/step_q_w": 0.2577091836734694, "calib/step_q_w_n": 588.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1359.0, "completions/max_terminated_length": 1359.0, "completions/mean_length": 476.3984375, "completions/mean_terminated_length": 480.14959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.1984, "grad_norm": 0.006201084237545729, "learning_rate": 3.8888888888888895e-07, "loss": 0.0511, "num_tokens": 42025174.0, "reward": 1.0477831363677979, "reward_std": 0.2021675556898117, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5604099035263062, "rewards/format_reward_step": 0.984375, "step": 186 }, { "aux_distill/final_loss": 0.004851514590882289, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06904444005340338, "aux_distill/mean_u": 0.19692998760247768, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 197.125, "aux_distill/step_loss": 0.6855928674340248, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.45261437908496727, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.4537549407114624, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06324110671936758, "calib/gap": -0.04977438411261942, "calib/mean_conf": 0.19794466403162056, "calib/mu_c": 0.1749264705882353, "calib/mu_w": 0.2247008547008547, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.057075098814229244, "calib/std_conf": 0.2702533668863807, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.23106534090909092, "calib/step_q_c_n": 704.0, "calib/step_q_gap": 0.0018582924509411292, "calib/step_q_w": 0.2292070484581498, "calib/step_q_w_n": 908.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2587.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 542.12109375, "completions/mean_terminated_length": 548.5494384765625, "completions/min_length": 0.0, "completions/min_terminated_length": 187.0, "epoch": 0.19946666666666665, "grad_norm": 0.005682837218046188, "learning_rate": 3.611111111111111e-07, "loss": 0.04, "num_tokens": 42265501.0, "reward": 1.025758981704712, "reward_std": 0.1700371503829956, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5319867134094238, "rewards/format_reward_step": 0.98828125, "step": 187 }, { "aux_distill/final_loss": 0.00015512108848270145, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06917913688812405, "aux_distill/mean_u": 0.21871284821246945, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 149.625, "aux_distill/step_loss": 0.6916362354531884, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.47536195072390147, "calib/avg_num_step_conf": 5.2265625, "calib/ece": 0.36473505976095627, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.09163346613545817, "calib/gap": 0.011983140716281454, "calib/mean_conf": 0.23072310756972114, "calib/mu_c": 0.23678629032258064, "calib/mu_w": 0.22480314960629919, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05071713147410359, "calib/std_conf": 0.3014775764588048, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.2872699300699301, "calib/step_q_c_n": 572.0, "calib/step_q_gap": 0.051799956179590684, "calib/step_q_w": 0.23546997389033944, "calib/step_q_w_n": 766.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 512.953125, "completions/mean_terminated_length": 521.0952758789062, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.20053333333333334, "grad_norm": 0.00596034899353981, "learning_rate": 3.3333333333333335e-07, "loss": 0.074, "num_tokens": 42500889.0, "reward": 1.0225554704666138, "reward_std": 0.19933326542377472, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5841735005378723, "rewards/format_reward_step": 0.9765625, "step": 188 }, { "aux_distill/final_loss": 0.0001890165859776971, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07034963823389262, "aux_distill/mean_u": 0.22055540871474788, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 150.0, "aux_distill/step_loss": 0.7033073594793677, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5282632010598701, "calib/avg_num_step_conf": 4.6875, "calib/ece": 0.3914682539682539, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06746031746031746, "calib/gap": 0.02654659012049712, "calib/mean_conf": 0.21503968253968253, "calib/mu_c": 0.2277862595419847, "calib/mu_w": 0.20123966942148758, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04333333333333332, "calib/std_conf": 0.2851767733811604, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.27550335570469797, "calib/step_q_c_n": 596.0, "calib/step_q_gap": 0.048821236499399956, "calib/step_q_w": 0.22668211920529802, "calib/step_q_w_n": 604.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 498.8203125, "completions/mean_terminated_length": 500.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.2016, "grad_norm": 0.007148802746087313, "learning_rate": 3.055555555555556e-07, "loss": 0.0749, "num_tokens": 42736355.0, "reward": 1.0361970663070679, "reward_std": 0.17948536574840546, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5802066326141357, "rewards/format_reward_step": 0.98046875, "step": 189 }, { "aux_distill/final_loss": 0.0004349350431880339, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06765694764908403, "aux_distill/mean_u": 0.20189786745076008, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 149.125, "aux_distill/step_loss": 0.676134523935616, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5152868217054263, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.3947244094488189, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.06299212598425197, "calib/gap": 0.03580279069767445, "calib/mean_conf": 0.19354330708661416, "calib/mu_c": 0.21116279069767444, "calib/mu_w": 0.17536, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.040196850393700786, "calib/std_conf": 0.27659352922286673, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2808823529411765, "calib/step_q_c_n": 578.0, "calib/step_q_gap": 0.041440076518412305, "calib/step_q_w": 0.23944227642276422, "calib/step_q_w_n": 615.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1766.0, "completions/max_terminated_length": 1766.0, "completions/mean_length": 532.890625, "completions/mean_terminated_length": 537.0866088867188, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.20266666666666666, "grad_norm": 0.006504186429083347, "learning_rate": 2.7777777777777776e-07, "loss": 0.0497, "num_tokens": 42978383.0, "reward": 1.0420573949813843, "reward_std": 0.16218328475952148, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5880210995674133, "rewards/format_reward_step": 0.9921875, "step": 190 }, { "aux_distill/final_loss": 0.00032513609193074444, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07050644035916775, "aux_distill/mean_u": 0.16254164632472845, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 172.75, "aux_distill/step_loss": 0.7047392604872584, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5509948692278813, "calib/avg_num_step_conf": 5.56640625, "calib/ece": 0.3572446640316206, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.07905138339920949, "calib/gap": 0.05162393943186086, "calib/mean_conf": 0.23769604743083003, "calib/mu_c": 0.2644262295081967, "calib/mu_w": 0.21280229007633586, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.056363636363636366, "calib/std_conf": 0.3083635465198859, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.2993606557377049, "calib/step_q_c_n": 610.0, "calib/step_q_gap": 0.05421611586040426, "calib/step_q_w": 0.24514453987730062, "calib/step_q_w_n": 815.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1881.0, "completions/max_terminated_length": 1881.0, "completions/mean_length": 477.61328125, "completions/mean_terminated_length": 483.2767028808594, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.20373333333333332, "grad_norm": 0.0066607119515538216, "learning_rate": 2.5000000000000004e-07, "loss": 0.0038, "num_tokens": 43204820.0, "reward": 1.039391279220581, "reward_std": 0.18578588962554932, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6139389276504517, "rewards/format_reward_step": 0.98828125, "step": 191 }, { "aux_distill/final_loss": 0.0001435406411474105, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07215216173790395, "aux_distill/mean_u": 0.20260344511299375, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 152.5, "aux_distill/step_loss": 0.7213780581951141, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5733936250948647, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.370952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": 0.07278522641032123, "calib/mean_conf": 0.22412698412698415, "calib/mu_c": 0.25820895522388054, "calib/mu_w": 0.1854237288135593, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03166666666666667, "calib/std_conf": 0.2952649330693171, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.292048, "calib/step_q_c_n": 625.0, "calib/step_q_gap": 0.05181270588235293, "calib/step_q_w": 0.24023529411764705, "calib/step_q_w_n": 595.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2600.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 538.3828125, "completions/mean_terminated_length": 538.3828125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 0.2048, "grad_norm": 0.005982318893074989, "learning_rate": 2.2222222222222224e-07, "loss": 0.1014, "num_tokens": 43447622.0, "reward": 1.048103928565979, "reward_std": 0.18802429735660553, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5923015475273132, "rewards/format_reward_step": 0.98046875, "step": 192 }, { "aux_distill/final_loss": 0.0001167211955817038, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07536566106136888, "aux_distill/mean_u": 0.1909164239972604, "aux_distill/n_active_final_tok": 31.25, "aux_distill/n_active_tok": 154.0, "aux_distill/step_loss": 0.7535398826003075, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.511112538540596, "calib/avg_num_step_conf": 4.8125, "calib/ece": 0.4472509960159363, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.08366533864541832, "calib/gap": -0.002033016443987651, "calib/mean_conf": 0.2137848605577689, "calib/mu_c": 0.2128776978417266, "calib/mu_w": 0.21491071428571426, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.053625498007968134, "calib/std_conf": 0.3043947975226336, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.26211312803889786, "calib/step_q_c_n": 617.0, "calib/step_q_gap": -0.01899256301801272, "calib/step_q_w": 0.2811056910569106, "calib/step_q_w_n": 615.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1865.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 512.19921875, "completions/mean_terminated_length": 516.2322998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.20586666666666667, "grad_norm": 0.006591039709746838, "learning_rate": 1.9444444444444447e-07, "loss": 0.0923, "num_tokens": 43684457.0, "reward": 1.0221588611602783, "reward_std": 0.19100113213062286, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5286929607391357, "rewards/format_reward_step": 0.97265625, "step": 193 }, { "aux_distill/final_loss": 0.00011849602537949977, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06704711669590324, "aux_distill/mean_u": 0.18057837039754573, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 140.625, "aux_distill/step_loss": 0.6703526610508561, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4985137869972173, "calib/avg_num_step_conf": 4.39453125, "calib/ece": 0.4415079365079365, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0873015873015873, "calib/gap": -0.014037439919048822, "calib/mean_conf": 0.22634920634920636, "calib/mu_c": 0.21977611940298508, "calib/mu_w": 0.2338135593220339, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.06805555555555554, "calib/std_conf": 0.30648960513356027, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.274263698630137, "calib/step_q_c_n": 584.0, "calib/step_q_gap": 0.011620445395386536, "calib/step_q_w": 0.26264325323475046, "calib/step_q_w_n": 541.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2536.0, "completions/max_terminated_length": 2536.0, "completions/mean_length": 475.30078125, "completions/mean_terminated_length": 477.16473388671875, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.20693333333333333, "grad_norm": 0.006791603751480579, "learning_rate": 1.6666666666666668e-07, "loss": 0.1045, "num_tokens": 43912078.0, "reward": 1.0240570306777954, "reward_std": 0.20932304859161377, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.5442078113555908, "rewards/format_reward_step": 0.98046875, "step": 194 }, { "aux_distill/final_loss": 0.00012114963419662672, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07237316464306787, "aux_distill/mean_u": 0.228926683168374, "aux_distill/n_active_final_tok": 31.5, "aux_distill/n_active_tok": 136.75, "aux_distill/step_loss": 0.723610489629209, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5112593828190158, "calib/avg_num_step_conf": 4.30078125, "calib/ece": 0.42269841269841274, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.10317460317460317, "calib/gap": -0.004797587733367525, "calib/mean_conf": 0.2603968253968254, "calib/mu_c": 0.25832167832167835, "calib/mu_w": 0.2631192660550459, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.05781746031746032, "calib/std_conf": 0.3235576070335652, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3183720930232558, "calib/step_q_c_n": 602.0, "calib/step_q_gap": -0.001267185533858406, "calib/step_q_w": 0.3196392785571142, "calib/step_q_w_n": 499.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2981.0, "completions/max_terminated_length": 2981.0, "completions/mean_length": 460.64453125, "completions/mean_terminated_length": 464.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.208, "grad_norm": 0.006784157827496529, "learning_rate": 1.3888888888888888e-07, "loss": 0.0426, "num_tokens": 44135987.0, "reward": 1.043771505355835, "reward_std": 0.18392643332481384, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5445742011070251, "rewards/format_reward_step": 0.984375, "step": 195 }, { "aux_distill/final_loss": 0.0080426042968611, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06883227336220443, "aux_distill/mean_u": 0.1723530723501916, "aux_distill/n_active_final_tok": 31.75, "aux_distill/n_active_tok": 155.25, "aux_distill/step_loss": 0.680280108936131, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5325003116041381, "calib/avg_num_step_conf": 4.86328125, "calib/ece": 0.3845882352941177, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.07450980392156863, "calib/gap": 0.0532500311604138, "calib/mean_conf": 0.24098039215686273, "calib/mu_c": 0.2645774647887324, "calib/mu_w": 0.2113274336283186, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03435294117647059, "calib/std_conf": 0.3023441416791292, "calib/step_conf_rate": 0.99609375, "calib/step_q_c": 0.3001815431164902, "calib/step_q_c_n": 661.0, "calib/step_q_gap": 0.02321236503429841, "calib/step_q_w": 0.2769691780821918, "calib/step_q_w_n": 584.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 964.0, "completions/max_terminated_length": 964.0, "completions/mean_length": 427.60546875, "completions/mean_terminated_length": 429.2823791503906, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.20906666666666668, "grad_norm": 0.00739358039572835, "learning_rate": 1.1111111111111112e-07, "loss": 0.0675, "num_tokens": 44347998.0, "reward": 1.064500331878662, "reward_std": 0.18100447952747345, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.5821257829666138, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_distill/final_loss": 0.00013514872648556775, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07078921876382083, "aux_distill/mean_u": 0.22375121075837906, "aux_distill/n_active_final_tok": 31.625, "aux_distill/n_active_tok": 147.625, "aux_distill/step_loss": 0.707757031545043, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4975877192982456, "calib/avg_num_step_conf": 4.61328125, "calib/ece": 0.3769565217391305, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.06719367588932806, "calib/gap": -0.008139724310776947, "calib/mean_conf": 0.19869565217391305, "calib/mu_c": 0.19441666666666665, "calib/mu_w": 0.2025563909774436, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.050671936758893296, "calib/std_conf": 0.2776658039383831, "calib/step_conf_rate": 1.0, "calib/step_q_c": 0.23487940630797774, "calib/step_q_c_n": 539.0, "calib/step_q_gap": -0.03834489275744288, "calib/step_q_w": 0.2732242990654206, "calib/step_q_w_n": 642.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2541.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 506.9375, "completions/mean_terminated_length": 506.9375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.21013333333333334, "grad_norm": 0.006808362435549498, "learning_rate": 8.333333333333334e-08, "loss": 0.1027, "num_tokens": 44582830.0, "reward": 1.0218079090118408, "reward_std": 0.16181005537509918, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5865848064422607, "rewards/format_reward_step": 0.98828125, "step": 197 }, { "aux_distill/final_loss": 0.00012877855374426872, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07026364665944129, "aux_distill/mean_u": 0.18823215760758855, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 161.125, "aux_distill/step_loss": 0.7025076858699322, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4268308323714249, "calib/avg_num_step_conf": 5.0546875, "calib/ece": 0.46108764940239044, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0796812749003984, "calib/gap": -0.06961645504681288, "calib/mean_conf": 0.21508764940239045, "calib/mu_c": 0.1837463768115942, "calib/mu_w": 0.2533628318584071, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.06318725099601592, "calib/std_conf": 0.29121036106102993, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2709907932011332, "calib/step_q_c_n": 706.0, "calib/step_q_gap": -0.0014921999961457488, "calib/step_q_w": 0.2724829931972789, "calib/step_q_w_n": 588.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 1387.0, "completions/max_terminated_length": 1387.0, "completions/mean_length": 459.11328125, "completions/mean_terminated_length": 468.25897216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.2112, "grad_norm": 0.006058075465261936, "learning_rate": 5.555555555555556e-08, "loss": 0.0379, "num_tokens": 44805747.0, "reward": 1.0152664184570312, "reward_std": 0.1935424953699112, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.5110015273094177, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "aux_distill/final_loss": 0.00014436346458523985, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.07156735879834741, "aux_distill/mean_u": 0.20905314065619962, "aux_distill/n_active_final_tok": 31.375, "aux_distill/n_active_tok": 157.875, "aux_distill/step_loss": 0.7155292062088847, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4812260536398467, "calib/avg_num_step_conf": 5.01171875, "calib/ece": 0.44147410358565725, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.08764940239043825, "calib/gap": -0.041166028097062546, "calib/mean_conf": 0.24561752988047805, "calib/mu_c": 0.22659259259259257, "calib/mu_w": 0.2677586206896551, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0746215139442231, "calib/std_conf": 0.3130697469664095, "calib/step_conf_rate": 0.9921875, "calib/step_q_c": 0.2637047756874096, "calib/step_q_c_n": 691.0, "calib/step_q_gap": -0.03202157566394176, "calib/step_q_w": 0.29572635135135134, "calib/step_q_w_n": 592.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1851.0, "completions/max_terminated_length": 1851.0, "completions/mean_length": 522.2890625, "completions/mean_terminated_length": 530.5794067382812, "completions/min_length": 0.0, "completions/min_terminated_length": 177.0, "epoch": 0.21226666666666666, "grad_norm": 0.005946105346083641, "learning_rate": 2.777777777777778e-08, "loss": 0.0273, "num_tokens": 45043653.0, "reward": 1.022146463394165, "reward_std": 0.1993604302406311, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5364804267883301, "rewards/format_reward_step": 0.9765625, "step": 199 }, { "aux_distill/final_loss": 0.00012137726037053653, "aux_distill/lambda": 0.10000000000000005, "aux_distill/lambda_final": 0.10000000000000005, "aux_distill/loss": 0.06986102659720927, "aux_distill/mean_u": 0.19741112501193644, "aux_distill/n_active_final_tok": 31.125, "aux_distill/n_active_tok": 146.625, "aux_distill/step_loss": 0.6984888771548867, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5459529505582137, "calib/avg_num_step_conf": 4.68359375, "calib/ece": 0.4782868525896413, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.09561752988047809, "calib/gap": 0.0029173312068048463, "calib/mean_conf": 0.22792828685258967, "calib/mu_c": 0.22907894736842105, "calib/mu_w": 0.2261616161616162, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0503187250996016, "calib/std_conf": 0.313415338074085, "calib/step_conf_rate": 0.98828125, "calib/step_q_c": 0.27278184523809523, "calib/step_q_c_n": 672.0, "calib/step_q_gap": 0.0038021488434083484, "calib/step_q_w": 0.2689796963946869, "calib/step_q_w_n": 527.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 506.9921875, "completions/mean_terminated_length": 513.0039672851562, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.21333333333333335, "grad_norm": 0.006176741328090429, "learning_rate": 0.0, "loss": 0.0705, "num_tokens": 45281491.0, "reward": 1.036815881729126, "reward_std": 0.21423842012882233, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.5072253942489624, "rewards/format_reward_step": 0.97265625, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.11054307345300912, "train_runtime": 17801.0374, "train_samples_per_second": 2.876, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45281491, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }