{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.9022029798764449, "aux_distill/mean_u": 0.3108363672915008, "aux_distill/n_active_tok": 54.0, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.012639827094972134, "learning_rate": 2.5000000000000004e-07, "loss": 0.0681, "num_tokens": 264685.0, "reward": 0.037574999034404755, "reward_std": 0.07449960708618164, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.5604067397745032, "aux_distill/mean_u": 0.29356464889172007, "aux_distill/n_active_tok": 58.36842105263158, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.008656824938952923, "learning_rate": 5.000000000000001e-07, "loss": 0.0987, "num_tokens": 533467.0, "reward": 0.07537207007408142, "reward_std": 0.14035090804100037, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.7020920283264585, "aux_distill/mean_u": 0.2341661230714126, "aux_distill/n_active_tok": 45.333333333333336, "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.3384615384615385, "calib/avg_num_step_conf": 0.3984375, "calib/ece": 0.6294444444444445, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.0038461538461534994, "calib/mean_conf": 0.9072222222222222, "calib/mu_c": 0.9099999999999999, "calib/mu_w": 0.9061538461538464, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.6294444444444445, "calib/std_conf": 0.12164637070695056, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 751.33203125, "completions/mean_terminated_length": 808.155517578125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.01135075930505991, "learning_rate": 7.5e-07, "loss": 0.064, "num_tokens": 831064.0, "reward": 0.05656171590089798, "reward_std": 0.13380812108516693, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.027185939252376556, "rewards/format_reward_step": 0.06640625, "step": 3 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6724928220113119, "aux_distill/mean_u": 0.2293809413630709, "aux_distill/n_active_tok": 56.888888888888886, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.3888888888888889, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.7718181818181816, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": 0.007777777777777661, "calib/mean_conf": 0.9536363636363636, "calib/mu_c": 0.96, "calib/mu_w": 0.9522222222222223, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.0625, "calib/nonempty_step_conf_rate": 0.046875, "calib/pce": 0.7718181818181816, "calib/std_conf": 0.040959564822068396, "calib/step_conf_rate": 0.046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 2996.0, "completions/max_terminated_length": 2996.0, "completions/mean_length": 606.234375, "completions/mean_terminated_length": 674.7651977539062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.0061745117418468, "learning_rate": 1.0000000000000002e-06, "loss": 0.0274, "num_tokens": 1092428.0, "reward": 0.026426563039422035, "reward_std": 0.07038375735282898, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.009884374216198921, "rewards/format_reward_step": 0.03515625, "step": 4 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.7171915471553802, "aux_distill/mean_u": 0.2454572816006418, "aux_distill/n_active_tok": 43.42857142857143, "calib/answer_extract_rate": 0.0546875, "calib/auroc": 0.4285714285714286, "calib/avg_num_step_conf": 0.1484375, "calib/ece": 0.651375, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.375, "calib/gap": 0.07271428571428562, "calib/mean_conf": 0.776375, "calib/mu_c": 0.84, "calib/mu_w": 0.7672857142857143, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.0390625, "calib/pce": 0.651375, "calib/std_conf": 0.29716491107632476, "calib/step_conf_rate": 0.0390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2979.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 718.921875, "completions/mean_terminated_length": 789.888427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.005417319014668465, "learning_rate": 1.25e-06, "loss": 0.0285, "num_tokens": 1383160.0, "reward": 0.021108552813529968, "reward_std": 0.05447760224342346, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.010967106558382511, "rewards/format_reward_step": 0.02734375, "step": 5 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.7201201096177101, "aux_distill/mean_u": 0.33045206176182806, "aux_distill/n_active_tok": 54.416666666666664, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.45833333333333337, "calib/avg_num_step_conf": 0.3046875, "calib/ece": 0.8054285714285715, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.9285714285714286, "calib/gap": 0.013666666666666494, "calib/mean_conf": 0.9482857142857143, "calib/mu_c": 0.96, "calib/mu_w": 0.9463333333333335, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.8054285714285715, "calib/std_conf": 0.06116421522826722, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 630.50390625, "completions/mean_terminated_length": 672.5375366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.006735073868185282, "learning_rate": 1.5e-06, "loss": 0.0839, "num_tokens": 1650521.0, "reward": 0.0346597358584404, "reward_std": 0.0735585018992424, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.010725718922913074, "rewards/format_reward_step": 0.05078125, "step": 6 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6654665925923515, "aux_distill/mean_u": 0.3108273132858048, "aux_distill/n_active_tok": 48.05882352941177, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.4181818181818182, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.6393749999999999, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.9375, "calib/gap": 0.008909090909090867, "calib/mean_conf": 0.951875, "calib/mu_c": 0.958, "calib/mu_w": 0.9490909090909091, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.6393749999999999, "calib/std_conf": 0.042014692370645756, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 708.05078125, "completions/mean_terminated_length": 774.6196899414062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.009294859133660793, "learning_rate": 1.75e-06, "loss": 0.0773, "num_tokens": 1939206.0, "reward": 0.044801369309425354, "reward_std": 0.10697519779205322, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.01929023489356041, "rewards/format_reward_step": 0.05078125, "step": 7 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.7723357169067159, "aux_distill/mean_u": 0.3379113503404071, "aux_distill/n_active_tok": 44.11764705882353, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.0, "calib/avg_num_step_conf": 0.2890625, "calib/ece": 0.905, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": -0.39636363636363636, "calib/mean_conf": 0.9133333333333334, "calib/mu_c": 0.55, "calib/mu_w": 0.9463636363636364, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.8675, "calib/std_conf": 0.12638125740085915, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 3037.0, "completions/max_terminated_length": 3037.0, "completions/mean_length": 646.2109375, "completions/mean_terminated_length": 689.2916870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.008389068767428398, "learning_rate": 2.0000000000000003e-06, "loss": 0.058, "num_tokens": 2211148.0, "reward": 0.01809394732117653, "reward_std": 0.047178227454423904, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.004937890917062759, "rewards/format_reward_step": 0.02734375, "step": 8 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6544934511184692, "aux_distill/mean_u": 0.21143247752578329, "aux_distill/n_active_tok": 58.4, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.8333333333333334, "calib/avg_num_step_conf": 0.2890625, "calib/ece": 0.5545454545454547, "calib/final_conf_rate": 0.04296875, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.45454545454545453, "calib/gap": 0.28555555555555556, "calib/mean_conf": 0.7363636363636363, "calib/mu_c": 0.97, "calib/mu_w": 0.6844444444444444, "calib/nonempty_final_conf_rate": 0.04296875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.5545454545454547, "calib/std_conf": 0.32902812057973313, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.12890625, "completions/max_length": 3028.0, "completions/max_terminated_length": 3028.0, "completions/mean_length": 568.79296875, "completions/mean_terminated_length": 652.9641723632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.007296205498278141, "learning_rate": 2.25e-06, "loss": 0.0066, "num_tokens": 2464295.0, "reward": 0.030635546892881393, "reward_std": 0.06952265650033951, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.018302343785762787, "rewards/format_reward_step": 0.03515625, "step": 9 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.657138936659869, "aux_distill/mean_u": 0.2260673486435751, "aux_distill/n_active_tok": 42.35294117647059, "calib/answer_extract_rate": 0.10546875, "calib/auroc": 0.9444444444444444, "calib/avg_num_step_conf": 0.36328125, "calib/ece": 0.7887199999999999, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.12364444444444445, "calib/mean_conf": 0.88872, "calib/mu_c": 1.0, "calib/mu_w": 0.8763555555555556, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.0859375, "calib/pce": 0.7887199999999999, "calib/std_conf": 0.21795175980019063, "calib/step_conf_rate": 0.0859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 2933.0, "completions/max_terminated_length": 2933.0, "completions/mean_length": 597.015625, "completions/mean_terminated_length": 682.3035888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.009703119285404682, "learning_rate": 2.5e-06, "loss": 0.051, "num_tokens": 2723931.0, "reward": 0.04364839196205139, "reward_std": 0.09204517304897308, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.016984282061457634, "rewards/format_reward_step": 0.0625, "step": 10 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6334928455560104, "aux_distill/mean_u": 0.2929954140568702, "aux_distill/n_active_tok": 67.95652173913044, "calib/answer_extract_rate": 0.15234375, "calib/auroc": 0.44545454545454544, "calib/avg_num_step_conf": 0.7890625, "calib/ece": 0.7625925925925927, "calib/final_conf_rate": 0.10546875, "calib/format_rate": 0.078125, "calib/frac_conf_gt_0.9": 0.9259259259259259, "calib/gap": 0.0027272727272726893, "calib/mean_conf": 0.9477777777777777, "calib/mu_c": 0.95, "calib/mu_w": 0.9472727272727273, "calib/nonempty_final_conf_rate": 0.10546875, "calib/nonempty_reasoning_rate": 0.2109375, "calib/nonempty_step_conf_rate": 0.15625, "calib/pce": 0.7625925925925927, "calib/std_conf": 0.07435317788587381, "calib/step_conf_rate": 0.15625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 564.7890625, "completions/mean_terminated_length": 615.2595825195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.010903569869697094, "learning_rate": 2.7500000000000004e-06, "loss": 0.0657, "num_tokens": 2972997.0, "reward": 0.0605287104845047, "reward_std": 0.11411545425653458, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.02340117283165455, "rewards/format_reward_step": 0.078125, "step": 11 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6554603688418865, "aux_distill/mean_u": 0.3278240132703401, "aux_distill/n_active_tok": 63.666666666666664, "calib/answer_extract_rate": 0.19921875, "calib/auroc": 0.5538461538461538, "calib/avg_num_step_conf": 0.74609375, "calib/ece": 0.5900000000000001, "calib/final_conf_rate": 0.16015625, "calib/format_rate": 0.12109375, "calib/frac_conf_gt_0.9": 0.6829268292682927, "calib/gap": -0.05848717948717963, "calib/mean_conf": 0.8797560975609756, "calib/mu_c": 0.8426666666666667, "calib/mu_w": 0.9011538461538463, "calib/nonempty_final_conf_rate": 0.16015625, "calib/nonempty_reasoning_rate": 0.23828125, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.5519512195121952, "calib/std_conf": 0.17420884305742093, "calib/step_conf_rate": 0.1796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 566.5, "completions/mean_terminated_length": 614.5084838867188, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0128, "grad_norm": 0.012244653888046741, "learning_rate": 3e-06, "loss": 0.0742, "num_tokens": 3222197.0, "reward": 0.12596894800662994, "reward_std": 0.22686685621738434, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.06443789601325989, "rewards/format_reward_step": 0.12109375, "step": 12 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6795370479424795, "aux_distill/mean_u": 0.2659909197732691, "aux_distill/n_active_tok": 60.875, "calib/answer_extract_rate": 0.1875, "calib/auroc": 0.9396551724137931, "calib/avg_num_step_conf": 0.7265625, "calib/ece": 0.7548387096774192, "calib/final_conf_rate": 0.12109375, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.5483870967741935, "calib/gap": 0.18775862068965532, "calib/mean_conf": 0.8193548387096772, "calib/mu_c": 0.995, "calib/mu_w": 0.8072413793103447, "calib/nonempty_final_conf_rate": 0.12109375, "calib/nonempty_reasoning_rate": 0.234375, "calib/nonempty_step_conf_rate": 0.171875, "calib/pce": 0.7548387096774192, "calib/std_conf": 0.26929939549146925, "calib/step_conf_rate": 0.171875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 665.91796875, "completions/mean_terminated_length": 713.2844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.01278610061854124, "learning_rate": 3.2500000000000002e-06, "loss": 0.0775, "num_tokens": 3497264.0, "reward": 0.07057617604732513, "reward_std": 0.13284853100776672, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.03177734464406967, "rewards/format_reward_step": 0.1015625, "step": 13 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5881305737742062, "aux_distill/mean_u": 0.2588630859384443, "aux_distill/n_active_tok": 77.58620689655173, "calib/answer_extract_rate": 0.234375, "calib/auroc": 0.6453125, "calib/avg_num_step_conf": 1.10546875, "calib/ece": 0.6635190433333333, "calib/final_conf_rate": 0.1875, "calib/format_rate": 0.15625, "calib/frac_conf_gt_0.9": 0.5833333333333334, "calib/gap": 0.05577714799999989, "calib/mean_conf": 0.8022690433333333, "calib/mu_c": 0.8487499999999999, "calib/mu_w": 0.792972852, "calib/nonempty_final_conf_rate": 0.1875, "calib/nonempty_reasoning_rate": 0.28515625, "calib/nonempty_step_conf_rate": 0.22265625, "calib/pce": 0.64956071, "calib/std_conf": 0.24954569363040827, "calib/step_conf_rate": 0.22265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3034.0, "completions/max_terminated_length": 3034.0, "completions/mean_length": 621.8984375, "completions/mean_terminated_length": 655.168701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.013642070814967155, "learning_rate": 3.5e-06, "loss": 0.1353, "num_tokens": 3761870.0, "reward": 0.12654060125350952, "reward_std": 0.2307237684726715, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.06558118760585785, "rewards/format_reward_step": 0.15625, "step": 14 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5574666220566322, "aux_distill/mean_u": 0.33510179894522996, "aux_distill/n_active_tok": 96.48275862068965, "calib/answer_extract_rate": 0.28515625, "calib/auroc": 0.21071428571428574, "calib/avg_num_step_conf": 1.35546875, "calib/ece": 0.7250874316939889, "calib/final_conf_rate": 0.23828125, "calib/format_rate": 0.20703125, "calib/frac_conf_gt_0.9": 0.5737704918032787, "calib/gap": -0.28800595238095245, "calib/mean_conf": 0.746398907103825, "calib/mu_c": 0.48199999999999993, "calib/mu_w": 0.7700059523809524, "calib/nonempty_final_conf_rate": 0.23828125, "calib/nonempty_reasoning_rate": 0.34765625, "calib/nonempty_step_conf_rate": 0.27734375, "calib/pce": 0.69475956284153, "calib/std_conf": 0.3272540766327567, "calib/step_conf_rate": 0.27734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 550.6953125, "completions/mean_terminated_length": 589.8660888671875, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.016, "grad_norm": 0.01197971310466528, "learning_rate": 3.7500000000000005e-06, "loss": 0.1051, "num_tokens": 4010728.0, "reward": 0.1487540900707245, "reward_std": 0.23280642926692963, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.06703945249319077, "rewards/format_reward_step": 0.20703125, "step": 15 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5236153088766953, "aux_distill/mean_u": 0.40382603766356184, "aux_distill/n_active_tok": 92.96551724137932, "calib/answer_extract_rate": 0.25, "calib/auroc": 0.4451754385964912, "calib/avg_num_step_conf": 1.32421875, "calib/ece": 0.5738000000000001, "calib/final_conf_rate": 0.1953125, "calib/format_rate": 0.16796875, "calib/frac_conf_gt_0.9": 0.54, "calib/gap": -0.004473684210526407, "calib/mean_conf": 0.7534000000000001, "calib/mu_c": 0.75, "calib/mu_w": 0.7544736842105264, "calib/nonempty_final_conf_rate": 0.1953125, "calib/nonempty_reasoning_rate": 0.33984375, "calib/nonempty_step_conf_rate": 0.28125, "calib/pce": 0.5436000000000001, "calib/std_conf": 0.3135258203083121, "calib/step_conf_rate": 0.28125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3026.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 630.98828125, "completions/mean_terminated_length": 687.3744506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.01098259724676609, "learning_rate": 4.000000000000001e-06, "loss": 0.1117, "num_tokens": 4281109.0, "reward": 0.14574824273586273, "reward_std": 0.261897474527359, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.07665273547172546, "rewards/format_reward_step": 0.16796875, "step": 16 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5484382337139498, "aux_distill/mean_u": 0.31876979990318755, "aux_distill/n_active_tok": 132.38709677419354, "calib/answer_extract_rate": 0.3671875, "calib/auroc": 0.5265654648956357, "calib/avg_num_step_conf": 1.99609375, "calib/ece": 0.5748101265822785, "calib/final_conf_rate": 0.30859375, "calib/format_rate": 0.28125, "calib/frac_conf_gt_0.9": 0.43037974683544306, "calib/gap": 0.01680265654648949, "calib/mean_conf": 0.7115189873417722, "calib/mu_c": 0.7247058823529412, "calib/mu_w": 0.7079032258064517, "calib/nonempty_final_conf_rate": 0.30859375, "calib/nonempty_reasoning_rate": 0.48828125, "calib/nonempty_step_conf_rate": 0.421875, "calib/pce": 0.5355696202531646, "calib/std_conf": 0.3216456443441397, "calib/step_conf_rate": 0.421875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2972.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 609.50390625, "completions/mean_terminated_length": 629.165283203125, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.018133333333333335, "grad_norm": 0.011893536895513535, "learning_rate": 4.25e-06, "loss": 0.2036, "num_tokens": 4540670.0, "reward": 0.24156543612480164, "reward_std": 0.3197880983352661, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.13547460734844208, "rewards/format_reward_step": 0.28125, "step": 17 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5134281227665562, "aux_distill/mean_u": 0.2940568334667696, "aux_distill/n_active_tok": 126.51612903225806, "calib/answer_extract_rate": 0.375, "calib/auroc": 0.7677556818181819, "calib/avg_num_step_conf": 1.8828125, "calib/ece": 0.5039999999999999, "calib/final_conf_rate": 0.29296875, "calib/format_rate": 0.24609375, "calib/frac_conf_gt_0.9": 0.3466666666666667, "calib/gap": 0.2730113636363637, "calib/mean_conf": 0.6506666666666666, "calib/mu_c": 0.8836363636363637, "calib/mu_w": 0.610625, "calib/nonempty_final_conf_rate": 0.29296875, "calib/nonempty_reasoning_rate": 0.46484375, "calib/nonempty_step_conf_rate": 0.37890625, "calib/pce": 0.5039999999999999, "calib/std_conf": 0.32455234126750987, "calib/step_conf_rate": 0.37890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 2599.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 535.1015625, "completions/mean_terminated_length": 563.7283935546875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0192, "grad_norm": 0.012328559532761574, "learning_rate": 4.5e-06, "loss": 0.1924, "num_tokens": 4788376.0, "reward": 0.21568945050239563, "reward_std": 0.31785470247268677, "rewards/accuracy_reward_step": 0.04296875, "rewards/final_brier_reward_step": 0.14231640100479126, "rewards/format_reward_step": 0.24609375, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5287263132631779, "aux_distill/mean_u": 0.39094491986338004, "aux_distill/n_active_tok": 264.46875, "calib/answer_extract_rate": 0.67578125, "calib/auroc": 0.49503816793893135, "calib/avg_num_step_conf": 3.9140625, "calib/ece": 0.4305167770419426, "calib/final_conf_rate": 0.58984375, "calib/format_rate": 0.51953125, "calib/frac_conf_gt_0.9": 0.17880794701986755, "calib/gap": 0.004201272264630962, "calib/mean_conf": 0.5153551876379692, "calib/mu_c": 0.5189999999999999, "calib/mu_w": 0.5147987277353689, "calib/nonempty_final_conf_rate": 0.58984375, "calib/nonempty_reasoning_rate": 0.82421875, "calib/nonempty_step_conf_rate": 0.734375, "calib/pce": 0.40671081677704196, "calib/std_conf": 0.34326462305766253, "calib/step_conf_rate": 0.734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2774.0, "completions/max_terminated_length": 2774.0, "completions/mean_length": 455.41015625, "completions/mean_terminated_length": 464.4820861816406, "completions/min_length": 0.0, "completions/min_terminated_length": 13.0, "epoch": 0.020266666666666665, "grad_norm": 0.015135838650166988, "learning_rate": 4.75e-06, "loss": 0.2171, "num_tokens": 5009721.0, "reward": 0.4618377685546875, "reward_std": 0.4449642300605774, "rewards/accuracy_reward_step": 0.08203125, "rewards/final_brier_reward_step": 0.3221130669116974, "rewards/format_reward_step": 0.51953125, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5356345996260643, "aux_distill/mean_u": 0.3635441304973222, "aux_distill/n_active_tok": 273.34375, "calib/answer_extract_rate": 0.734375, "calib/auroc": 0.5462904911180774, "calib/avg_num_step_conf": 4.15234375, "calib/ece": 0.3078365168539326, "calib/final_conf_rate": 0.6953125, "calib/format_rate": 0.6171875, "calib/frac_conf_gt_0.9": 0.1348314606741573, "calib/gap": 0.03933297805642627, "calib/mean_conf": 0.46705, "calib/mu_c": 0.49909090909090903, "calib/mu_w": 0.45975793103448276, "calib/nonempty_final_conf_rate": 0.6953125, "calib/nonempty_reasoning_rate": 0.89453125, "calib/nonempty_step_conf_rate": 0.8046875, "calib/pce": 0.29474662921348316, "calib/std_conf": 0.32703286167044365, "calib/step_conf_rate": 0.8046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2788.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 405.89453125, "completions/mean_terminated_length": 412.3373107910156, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.021333333333333333, "grad_norm": 0.015294047072529793, "learning_rate": 5e-06, "loss": 0.1992, "num_tokens": 5218502.0, "reward": 0.586089015007019, "reward_std": 0.45785677433013916, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.4260842204093933, "rewards/format_reward_step": 0.6171875, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5090240705758333, "aux_distill/mean_u": 0.3329150960047672, "aux_distill/n_active_tok": 251.65625, "calib/answer_extract_rate": 0.78125, "calib/auroc": 0.44311613242249087, "calib/avg_num_step_conf": 3.89453125, "calib/ece": 0.3569728205128205, "calib/final_conf_rate": 0.76171875, "calib/format_rate": 0.66796875, "calib/frac_conf_gt_0.9": 0.14871794871794872, "calib/gap": -0.06790578034682071, "calib/mean_conf": 0.4042446153846154, "calib/mu_c": 0.3440000000000001, "calib/mu_w": 0.4119057803468208, "calib/nonempty_final_conf_rate": 0.76171875, "calib/nonempty_reasoning_rate": 0.9140625, "calib/nonempty_step_conf_rate": 0.859375, "calib/pce": 0.3241984615384615, "calib/std_conf": 0.3538810433323326, "calib/step_conf_rate": 0.859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2406.0, "completions/max_terminated_length": 2406.0, "completions/mean_length": 329.9296875, "completions/mean_terminated_length": 333.8419189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 12.0, "epoch": 0.0224, "grad_norm": 0.016006583347916603, "learning_rate": 4.9722222222222224e-06, "loss": 0.172, "num_tokens": 5405924.0, "reward": 0.6004676222801208, "reward_std": 0.45191651582717896, "rewards/accuracy_reward_step": 0.0859375, "rewards/final_brier_reward_step": 0.4470290243625641, "rewards/format_reward_step": 0.66796875, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5112969875335693, "aux_distill/mean_u": 0.3821755846262978, "aux_distill/n_active_tok": 295.46875, "calib/answer_extract_rate": 0.8515625, "calib/auroc": 0.619299571680524, "calib/avg_num_step_conf": 4.54296875, "calib/ece": 0.29235460317460316, "calib/final_conf_rate": 0.8203125, "calib/format_rate": 0.765625, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.11863703703703693, "calib/mean_conf": 0.3673536507936508, "calib/mu_c": 0.47412698412698406, "calib/mu_w": 0.35548994708994713, "calib/nonempty_final_conf_rate": 0.8203125, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.279854126984127, "calib/std_conf": 0.31691713012233746, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2508.0, "completions/max_terminated_length": 2508.0, "completions/mean_length": 329.28125, "completions/mean_terminated_length": 329.28125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.023466666666666667, "grad_norm": 0.013608487322926521, "learning_rate": 4.944444444444445e-06, "loss": 0.1555, "num_tokens": 5592036.0, "reward": 0.7152169346809387, "reward_std": 0.41016140580177307, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.5749650001525879, "rewards/format_reward_step": 0.765625, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5156875886023045, "aux_distill/mean_u": 0.31364435556478476, "aux_distill/n_active_tok": 292.28125, "calib/answer_extract_rate": 0.86328125, "calib/auroc": 0.5870047814207651, "calib/avg_num_step_conf": 4.46484375, "calib/ece": 0.2904984186046512, "calib/final_conf_rate": 0.83984375, "calib/format_rate": 0.7890625, "calib/frac_conf_gt_0.9": 0.13953488372093023, "calib/gap": 0.10023129781420764, "calib/mean_conf": 0.38249934883720926, "calib/mu_c": 0.46781249999999996, "calib/mu_w": 0.3675812021857923, "calib/nonempty_final_conf_rate": 0.83984375, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.26208027906976744, "calib/std_conf": 0.33951695417822614, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2779.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 342.59375, "completions/mean_terminated_length": 348.0317687988281, "completions/min_length": 0.0, "completions/min_terminated_length": 21.0, "epoch": 0.024533333333333334, "grad_norm": 0.012036532163619995, "learning_rate": 4.9166666666666665e-06, "loss": 0.2276, "num_tokens": 5783676.0, "reward": 0.7419977188110352, "reward_std": 0.4112620949745178, "rewards/accuracy_reward_step": 0.125, "rewards/final_brier_reward_step": 0.5699329376220703, "rewards/format_reward_step": 0.7890625, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5309049636125565, "aux_distill/mean_u": 0.38915524999249224, "aux_distill/n_active_tok": 273.8125, "calib/answer_extract_rate": 0.875, "calib/auroc": 0.560290404040404, "calib/avg_num_step_conf": 4.14453125, "calib/ece": 0.25765810810810813, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.78125, "calib/frac_conf_gt_0.9": 0.06306306306306306, "calib/gap": 0.04373686868686877, "calib/mean_conf": 0.33765810810810815, "calib/mu_c": 0.3766666666666667, "calib/mu_w": 0.33292979797979794, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.24360405405405405, "calib/std_conf": 0.3121383562224444, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2976.0, "completions/max_terminated_length": 2976.0, "completions/mean_length": 271.21875, "completions/mean_terminated_length": 271.21875, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "epoch": 0.0256, "grad_norm": 0.011990250088274479, "learning_rate": 4.888888888888889e-06, "loss": 0.1158, "num_tokens": 5957620.0, "reward": 0.7375925183296204, "reward_std": 0.40519455075263977, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.6001850366592407, "rewards/format_reward_step": 0.78125, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49400834552943707, "aux_distill/mean_u": 0.33734021095642364, "aux_distill/n_active_tok": 262.9375, "calib/answer_extract_rate": 0.92578125, "calib/auroc": 0.6188197767145135, "calib/avg_num_step_conf": 4.078125, "calib/ece": 0.2676514644351464, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.87109375, "calib/frac_conf_gt_0.9": 0.09205020920502092, "calib/gap": 0.15417878787878786, "calib/mean_conf": 0.3525075313807531, "calib/mu_c": 0.48733333333333334, "calib/mu_w": 0.3331545454545455, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.24731799163179916, "calib/std_conf": 0.3220192977821595, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1409.0, "completions/max_terminated_length": 1409.0, "completions/mean_length": 241.72265625, "completions/mean_terminated_length": 242.6706085205078, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.02666666666666667, "grad_norm": 0.01171167753636837, "learning_rate": 4.861111111111111e-06, "loss": 0.024, "num_tokens": 6122725.0, "reward": 0.8318536281585693, "reward_std": 0.3171781897544861, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.6715196967124939, "rewards/format_reward_step": 0.87109375, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5315526509657502, "aux_distill/mean_u": 0.4173970698103785, "aux_distill/n_active_tok": 267.8125, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.4739819004524887, "calib/avg_num_step_conf": 4.06640625, "calib/ece": 0.2542253112033195, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.04564315352697095, "calib/gap": -0.04812986425339366, "calib/mean_conf": 0.29713568464730294, "calib/mu_c": 0.253, "calib/mu_w": 0.30112986425339366, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.23418672199170124, "calib/std_conf": 0.2775307865477138, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2361.0, "completions/max_terminated_length": 2361.0, "completions/mean_length": 244.53125, "completions/mean_terminated_length": 244.53125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "epoch": 0.027733333333333332, "grad_norm": 0.012768019922077656, "learning_rate": 4.833333333333333e-06, "loss": 0.171, "num_tokens": 6290565.0, "reward": 0.8395206332206726, "reward_std": 0.28634095191955566, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.70638507604599, "rewards/format_reward_step": 0.89453125, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5077687716111541, "aux_distill/mean_u": 0.36229705136659734, "aux_distill/n_active_tok": 250.0625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48710407239819004, "calib/avg_num_step_conf": 3.8828125, "calib/ece": 0.1784360995850622, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.029045643153526972, "calib/gap": -0.0015764705882352625, "calib/mean_conf": 0.22994564315352697, "calib/mu_c": 0.2285, "calib/mu_w": 0.23007647058823527, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.16269709543568464, "calib/std_conf": 0.2365592422526031, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1575.0, "completions/max_terminated_length": 1575.0, "completions/mean_length": 214.62109375, "completions/mean_terminated_length": 215.46275329589844, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.0288, "grad_norm": 0.011952613480389118, "learning_rate": 4.805555555555556e-06, "loss": 0.0934, "num_tokens": 6450724.0, "reward": 0.9002429842948914, "reward_std": 0.21553806960582733, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.7887672185897827, "rewards/format_reward_step": 0.93359375, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4802441941574216, "aux_distill/mean_u": 0.33220112003744817, "aux_distill/n_active_tok": 234.8125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.47613561553653716, "calib/avg_num_step_conf": 3.67578125, "calib/ece": 0.18265755102040815, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.012244897959183673, "calib/gap": -0.00416255760368664, "calib/mean_conf": 0.22349755102040816, "calib/mu_c": 0.21981071428571428, "calib/mu_w": 0.22397327188940092, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.14593469387755104, "calib/std_conf": 0.23264285704224408, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2264.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 211.421875, "completions/mean_terminated_length": 212.25099182128906, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.029866666666666666, "grad_norm": 0.011538458988070488, "learning_rate": 4.777777777777778e-06, "loss": 0.0221, "num_tokens": 6611792.0, "reward": 0.9147476553916931, "reward_std": 0.2037634551525116, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.7787140607833862, "rewards/format_reward_step": 0.9375, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4926240276545286, "aux_distill/mean_u": 0.36958355035024315, "aux_distill/n_active_tok": 216.5, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5128104575163399, "calib/avg_num_step_conf": 3.359375, "calib/ece": 0.20314049586776856, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.01652892561983471, "calib/gap": 0.034379084967320256, "calib/mean_conf": 0.24685950413223137, "calib/mu_c": 0.2788235294117647, "calib/mu_w": 0.24444444444444444, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96484375, "calib/pce": 0.18987603305785122, "calib/std_conf": 0.23114709658767324, "calib/step_conf_rate": 0.96484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2303.0, "completions/max_terminated_length": 2303.0, "completions/mean_length": 199.52734375, "completions/mean_terminated_length": 200.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 28.0, "epoch": 0.030933333333333334, "grad_norm": 0.012967920862138271, "learning_rate": 4.75e-06, "loss": 0.127, "num_tokens": 6769999.0, "reward": 0.8791476488113403, "reward_std": 0.2470008134841919, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.7778265476226807, "rewards/format_reward_step": 0.90625, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49175314232707024, "aux_distill/mean_u": 0.32133843266442685, "aux_distill/n_active_tok": 209.53125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5404340716515534, "calib/avg_num_step_conf": 3.2578125, "calib/ece": 0.13880524193548385, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.012096774193548387, "calib/gap": 0.02508617511520736, "calib/mean_conf": 0.21417862903225804, "calib/mu_c": 0.2361290322580645, "calib/mu_w": 0.21104285714285714, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.11399193548387095, "calib/std_conf": 0.2115425749942957, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2050.0, "completions/max_terminated_length": 2050.0, "completions/mean_length": 174.0546875, "completions/mean_terminated_length": 174.0546875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.032, "grad_norm": 0.012511853128671646, "learning_rate": 4.722222222222222e-06, "loss": 0.0732, "num_tokens": 6921541.0, "reward": 0.9234327077865601, "reward_std": 0.19281959533691406, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7882716655731201, "rewards/format_reward_step": 0.9375, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4863882064819336, "aux_distill/mean_u": 0.3262798401698808, "aux_distill/n_active_tok": 212.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5903232012312427, "calib/avg_num_step_conf": 3.33984375, "calib/ece": 0.15530120481927714, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": 0.0482185455944594, "calib/mean_conf": 0.22188755020080322, "calib/mu_c": 0.26565217391304347, "calib/mu_w": 0.21743362831858407, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1424096385542169, "calib/std_conf": 0.2197334351794591, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 656.0, "completions/max_terminated_length": 656.0, "completions/mean_length": 163.23828125, "completions/mean_terminated_length": 163.87844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 10.0, "epoch": 0.03306666666666667, "grad_norm": 0.010997715406119823, "learning_rate": 4.694444444444445e-06, "loss": 0.0457, "num_tokens": 7069242.0, "reward": 0.9402660131454468, "reward_std": 0.17586375772953033, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8258445262908936, "rewards/format_reward_step": 0.9609375, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5132203064858913, "aux_distill/mean_u": 0.34264686314469306, "aux_distill/n_active_tok": 211.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5187995860641601, "calib/avg_num_step_conf": 3.265625, "calib/ece": 0.13620522088353412, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008032128514056224, "calib/gap": 0.047089030700241424, "calib/mean_conf": 0.18590481927710845, "calib/mu_c": 0.2280769230769231, "calib/mu_w": 0.18098789237668167, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.10884618473895584, "calib/std_conf": 0.1932826076047128, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 517.0, "completions/max_terminated_length": 517.0, "completions/mean_length": 158.86328125, "completions/mean_terminated_length": 159.4862823486328, "completions/min_length": 0.0, "completions/min_terminated_length": 20.0, "epoch": 0.034133333333333335, "grad_norm": 0.012896922416985035, "learning_rate": 4.666666666666667e-06, "loss": -0.0091, "num_tokens": 7216615.0, "reward": 0.949363112449646, "reward_std": 0.14953142404556274, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.836226224899292, "rewards/format_reward_step": 0.9609375, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48916500341147184, "aux_distill/mean_u": 0.33687785008226384, "aux_distill/n_active_tok": 225.0625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5212639783124364, "calib/avg_num_step_conf": 3.51953125, "calib/ece": 0.13667549407114624, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": 0.006728464927143352, "calib/mean_conf": 0.1870399209486166, "calib/mu_c": 0.1930769230769231, "calib/mu_w": 0.18634845814977974, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11047430830039523, "calib/std_conf": 0.19247799942857202, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 526.0, "completions/max_terminated_length": 526.0, "completions/mean_length": 171.6015625, "completions/mean_terminated_length": 172.27452087402344, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.0352, "grad_norm": 0.010750737972557545, "learning_rate": 4.638888888888889e-06, "loss": 0.0778, "num_tokens": 7367417.0, "reward": 0.9722968339920044, "reward_std": 0.10676688700914383, "rewards/accuracy_reward_step": 0.1015625, "rewards/final_brier_reward_step": 0.8547499179840088, "rewards/format_reward_step": 0.98828125, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47535041719675064, "aux_distill/mean_u": 0.33305644552452596, "aux_distill/n_active_tok": 255.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4020833333333334, "calib/avg_num_step_conf": 3.99609375, "calib/ece": 0.16385826771653544, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.07352898550724639, "calib/mean_conf": 0.175748031496063, "calib/mu_c": 0.10916666666666668, "calib/mu_w": 0.18269565217391306, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.12255905511811026, "calib/std_conf": 0.18389067140982998, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 526.0, "completions/max_terminated_length": 526.0, "completions/mean_length": 177.60546875, "completions/mean_terminated_length": 178.30197143554688, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.03626666666666667, "grad_norm": 0.011679654009640217, "learning_rate": 4.611111111111112e-06, "loss": 0.0003, "num_tokens": 7517996.0, "reward": 0.962560772895813, "reward_std": 0.10435040295124054, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8469964861869812, "rewards/format_reward_step": 0.984375, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48178594931960106, "aux_distill/mean_u": 0.3530250464581981, "aux_distill/n_active_tok": 261.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.6274481566820276, "calib/avg_num_step_conf": 4.09765625, "calib/ece": 0.06537254901960785, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.06063940092165898, "calib/mean_conf": 0.14576470588235294, "calib/mu_c": 0.19903225806451613, "calib/mu_w": 0.13839285714285715, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04478431372549019, "calib/std_conf": 0.15192394746434412, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 512.0, "completions/max_terminated_length": 512.0, "completions/mean_length": 186.82421875, "completions/mean_terminated_length": 187.55686950683594, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.037333333333333336, "grad_norm": 0.010054091922938824, "learning_rate": 4.583333333333333e-06, "loss": 0.0217, "num_tokens": 7675079.0, "reward": 0.9981177449226379, "reward_std": 0.07535213232040405, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8790480494499207, "rewards/format_reward_step": 0.99609375, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49229648895561695, "aux_distill/mean_u": 0.33450523767788776, "aux_distill/n_active_tok": 244.6875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5527711084433774, "calib/avg_num_step_conf": 3.82421875, "calib/ece": 0.11885375494071149, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01883253301320527, "calib/mean_conf": 0.11379446640316206, "calib/mu_c": 0.12897959183673469, "calib/mu_w": 0.11014705882352942, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019486166007905134, "calib/std_conf": 0.13063477301985285, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 458.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 173.2734375, "completions/mean_terminated_length": 173.9529571533203, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0384, "grad_norm": 0.010825212113559246, "learning_rate": 4.555555555555556e-06, "loss": 0.0231, "num_tokens": 7822149.0, "reward": 0.998137354850769, "reward_std": 0.0790884867310524, "rewards/accuracy_reward_step": 0.19140625, "rewards/final_brier_reward_step": 0.8165871500968933, "rewards/format_reward_step": 0.98828125, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4659074544906616, "aux_distill/mean_u": 0.2862848587118983, "aux_distill/n_active_tok": 283.40625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.3690749235474007, "calib/avg_num_step_conf": 4.421875, "calib/ece": 0.13330708661417323, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.04467125382262997, "calib/mean_conf": 0.08417322834645669, "calib/mu_c": 0.04583333333333334, "calib/mu_w": 0.09050458715596331, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03787401574803151, "calib/std_conf": 0.10079558283724063, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 820.0, "completions/max_terminated_length": 820.0, "completions/mean_length": 203.38671875, "completions/mean_terminated_length": 204.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.039466666666666664, "grad_norm": 0.010696339420974255, "learning_rate": 4.527777777777778e-06, "loss": 0.0656, "num_tokens": 7981312.0, "reward": 0.9861996173858643, "reward_std": 0.04991428181529045, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8434929847717285, "rewards/format_reward_step": 0.98828125, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5006300732493401, "aux_distill/mean_u": 0.36540829187686324, "aux_distill/n_active_tok": 330.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.40192603095828905, "calib/avg_num_step_conf": 5.15625, "calib/ece": 0.135975390625, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0076003426680846115, "calib/mean_conf": 0.062852734375, "calib/mu_c": 0.056410256410256404, "calib/mu_w": 0.06401059907834102, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023242187499999997, "calib/std_conf": 0.08558604812204028, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 678.0, "completions/max_terminated_length": 678.0, "completions/mean_length": 222.60546875, "completions/mean_terminated_length": 223.4784393310547, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.04053333333333333, "grad_norm": 0.0095800356939435, "learning_rate": 4.5e-06, "loss": 0.0517, "num_tokens": 8145187.0, "reward": 0.999052882194519, "reward_std": 0.03454170748591423, "rewards/accuracy_reward_step": 0.15234375, "rewards/final_brier_reward_step": 0.8496682643890381, "rewards/format_reward_step": 0.99609375, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47253161668777466, "aux_distill/mean_u": 0.31778438508438045, "aux_distill/n_active_tok": 330.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.41043385680063876, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.12454901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.022895927601809953, "calib/mean_conf": 0.04925490196078431, "calib/mu_c": 0.029411764705882353, "calib/mu_w": 0.052307692307692305, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.020235294117647056, "calib/std_conf": 0.08329862252874574, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 604.0, "completions/max_terminated_length": 604.0, "completions/mean_length": 241.74609375, "completions/mean_terminated_length": 242.6941375732422, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.0416, "grad_norm": 0.010325453244149685, "learning_rate": 4.472222222222223e-06, "loss": 0.0637, "num_tokens": 8313162.0, "reward": 0.9914298057556152, "reward_std": 0.03904122859239578, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.85785973072052, "rewards/format_reward_step": 0.9921875, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46040870528668165, "aux_distill/mean_u": 0.2964483882269135, "aux_distill/n_active_tok": 388.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4761849976990336, "calib/avg_num_step_conf": 6.0546875, "calib/ece": 0.14114624505928852, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0032581684307409105, "calib/mean_conf": 0.0283399209486166, "calib/mu_c": 0.025609756097560978, "calib/mu_w": 0.028867924528301888, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003715415019762845, "calib/std_conf": 0.05019734835438953, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1015.0, "completions/max_terminated_length": 1015.0, "completions/mean_length": 264.578125, "completions/mean_terminated_length": 265.6156921386719, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.042666666666666665, "grad_norm": 0.009926322847604752, "learning_rate": 4.444444444444444e-06, "loss": 0.0494, "num_tokens": 8487654.0, "reward": 0.9869291186332703, "reward_std": 0.05567903444170952, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.8293269872665405, "rewards/format_reward_step": 0.984375, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4719663066789508, "aux_distill/mean_u": 0.29660021391646385, "aux_distill/n_active_tok": 391.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5099252013808976, "calib/avg_num_step_conf": 6.125, "calib/ece": 0.29356862745098045, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00031933256616800687, "calib/mean_conf": 0.01623529411764706, "calib/mu_c": 0.016455696202531647, "calib/mu_w": 0.01613636363636364, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02471689763775105, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 868.0, "completions/max_terminated_length": 868.0, "completions/mean_length": 269.08203125, "completions/mean_terminated_length": 270.13726806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.04373333333333333, "grad_norm": 0.008122802712023258, "learning_rate": 4.416666666666667e-06, "loss": 0.0428, "num_tokens": 8663787.0, "reward": 1.0007362365722656, "reward_std": 0.019822387024760246, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6967852115631104, "rewards/format_reward_step": 0.99609375, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4728801045566797, "aux_distill/mean_u": 0.306885805886724, "aux_distill/n_active_tok": 396.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5105218855218855, "calib/avg_num_step_conf": 6.22265625, "calib/ece": 0.21075396825396825, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0065151515151515155, "calib/mean_conf": 0.014563492063492063, "calib/mu_c": 0.009444444444444446, "calib/mu_w": 0.015959595959595962, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005515873015873016, "calib/std_conf": 0.04327156697463605, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 660.0, "completions/max_terminated_length": 660.0, "completions/mean_length": 263.90625, "completions/mean_terminated_length": 264.9411926269531, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.0448, "grad_norm": 0.009029773995280266, "learning_rate": 4.388888888888889e-06, "loss": 0.039, "num_tokens": 8835715.0, "reward": 0.9872943162918091, "reward_std": 0.04495030641555786, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7753698825836182, "rewards/format_reward_step": 0.984375, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44131849333643913, "aux_distill/mean_u": 0.2632251547550202, "aux_distill/n_active_tok": 413.8125, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4897545246515498, "calib/avg_num_step_conf": 6.4609375, "calib/ece": 0.17211764705882354, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036966923236946133, "calib/mean_conf": 0.00827450980392157, "calib/mu_c": 0.011304347826086957, "calib/mu_w": 0.0076076555023923435, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.014258232477665623, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 777.0, "completions/max_terminated_length": 777.0, "completions/mean_length": 283.2265625, "completions/mean_terminated_length": 284.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.04586666666666667, "grad_norm": 0.006555310450494289, "learning_rate": 4.361111111111112e-06, "loss": 0.0454, "num_tokens": 9013445.0, "reward": 0.9940841197967529, "reward_std": 0.026495356112718582, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8162933588027954, "rewards/format_reward_step": 0.9921875, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46258422918617725, "aux_distill/mean_u": 0.31472563813177606, "aux_distill/n_active_tok": 493.59375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.437609756097561, "calib/avg_num_step_conf": 7.73828125, "calib/ece": 0.19019607843137254, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0028390243902439026, "calib/mean_conf": 0.005882352941176471, "calib/mu_c": 0.0036000000000000003, "calib/mu_w": 0.006439024390243903, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010285430508159473, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1352.0, "completions/max_terminated_length": 1352.0, "completions/mean_length": 324.62109375, "completions/mean_terminated_length": 325.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.046933333333333334, "grad_norm": 0.006308534182608128, "learning_rate": 4.333333333333334e-06, "loss": 0.0601, "num_tokens": 9202868.0, "reward": 0.9967269897460938, "reward_std": 0.012606674805283546, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.802047610282898, "rewards/format_reward_step": 0.99609375, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4415023336187005, "aux_distill/mean_u": 0.2417768633845245, "aux_distill/n_active_tok": 490.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5660124888492417, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.2319277108433735, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0020838537020517398, "calib/mean_conf": 0.0050200803212851405, "calib/mu_c": 0.006610169491525424, "calib/mu_w": 0.004526315789473684, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009188979692257087, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 352.97265625, "completions/mean_terminated_length": 352.97265625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.048, "grad_norm": 0.006134659983217716, "learning_rate": 4.305555555555556e-06, "loss": 0.1236, "num_tokens": 9398277.0, "reward": 0.9702200889587402, "reward_std": 0.0761524960398674, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7412214875221252, "rewards/format_reward_step": 0.96875, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46812527999281883, "aux_distill/mean_u": 0.3197349763021953, "aux_distill/n_active_tok": 522.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49359292328042326, "calib/avg_num_step_conf": 8.1171875, "calib/ece": 0.24723320158102768, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021585648148148145, "calib/mean_conf": 0.0057312252964426885, "calib/mu_c": 0.00734375, "calib/mu_w": 0.005185185185185185, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.013743805405934633, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2256.0, "completions/max_terminated_length": 2256.0, "completions/mean_length": 351.23046875, "completions/mean_terminated_length": 351.23046875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.04906666666666667, "grad_norm": 0.00599120045080781, "learning_rate": 4.277777777777778e-06, "loss": 0.1249, "num_tokens": 9592960.0, "reward": 0.9900075793266296, "reward_std": 0.03566751629114151, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.741733968257904, "rewards/format_reward_step": 0.98828125, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.45625958032906055, "aux_distill/mean_u": 0.2927239472315959, "aux_distill/n_active_tok": 530.875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5433224248091009, "calib/avg_num_step_conf": 8.4140625, "calib/ece": 0.26509803921568625, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0008181393174380557, "calib/mean_conf": 0.005490196078431373, "calib/mu_c": 0.006086956521739131, "calib/mu_w": 0.0052688172043010755, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00988437576558473, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 985.0, "completions/max_terminated_length": 985.0, "completions/mean_length": 341.44921875, "completions/mean_terminated_length": 342.78826904296875, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.050133333333333335, "grad_norm": 0.006031285040080547, "learning_rate": 4.25e-06, "loss": 0.0569, "num_tokens": 9786347.0, "reward": 0.9996237754821777, "reward_std": 0.008969930931925774, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.729716420173645, "rewards/format_reward_step": 0.99609375, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4554118439555168, "aux_distill/mean_u": 0.29516383089519194, "aux_distill/n_active_tok": 497.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5036963434022258, "calib/avg_num_step_conf": 7.796875, "calib/ece": 0.26541501976284587, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 3.1001589825119854e-05, "calib/mean_conf": 0.0033596837944664037, "calib/mu_c": 0.0033823529411764713, "calib/mu_w": 0.0033513513513513515, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00630084653786219, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 853.0, "completions/max_terminated_length": 853.0, "completions/mean_length": 321.41796875, "completions/mean_terminated_length": 322.6784362792969, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.0512, "grad_norm": 0.006986531894654036, "learning_rate": 4.222222222222223e-06, "loss": 0.0418, "num_tokens": 9972318.0, "reward": 0.9872015118598938, "reward_std": 0.04042886197566986, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7204968333244324, "rewards/format_reward_step": 0.984375, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.45771760307252407, "aux_distill/mean_u": 0.30600149759672407, "aux_distill/n_active_tok": 529.34375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4506413192853871, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.29179282868525896, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0026523133302794317, "calib/mean_conf": 0.004302788844621514, "calib/mu_c": 0.0024324324324324323, "calib/mu_w": 0.005084745762711864, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006374501992031873, "calib/std_conf": 0.01255315202204136, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2488.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 358.4375, "completions/mean_terminated_length": 358.4375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.05226666666666667, "grad_norm": 0.005493524018675089, "learning_rate": 4.194444444444445e-06, "loss": 0.0846, "num_tokens": 10168614.0, "reward": 0.9810855388641357, "reward_std": 0.05687147378921509, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.6926398277282715, "rewards/format_reward_step": 0.98046875, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44665439892560244, "aux_distill/mean_u": 0.29054208955618344, "aux_distill/n_active_tok": 519.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.45397571484528004, "calib/avg_num_step_conf": 8.08203125, "calib/ece": 0.2678346456692914, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0022585193889541715, "calib/mean_conf": 0.0038188976377952757, "calib/mu_c": 0.0021739130434782613, "calib/mu_w": 0.004432432432432433, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008693302085341467, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1978.0, "completions/max_terminated_length": 1978.0, "completions/mean_length": 361.5, "completions/mean_terminated_length": 361.5, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.05333333333333334, "grad_norm": 0.00480527663603425, "learning_rate": 4.166666666666667e-06, "loss": 0.0762, "num_tokens": 10366518.0, "reward": 0.9927287697792053, "reward_std": 0.023400694131851196, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7237386703491211, "rewards/format_reward_step": 0.9921875, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4320847960188985, "aux_distill/mean_u": 0.25169377117910835, "aux_distill/n_active_tok": 547.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.572090909090909, "calib/avg_num_step_conf": 8.68359375, "calib/ece": 0.21168627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0025500000000000006, "calib/mean_conf": 0.004, "calib/mu_c": 0.006000000000000001, "calib/mu_w": 0.0034500000000000004, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.00833607797938763, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1333.0, "completions/max_terminated_length": 1333.0, "completions/mean_length": 357.3046875, "completions/mean_terminated_length": 358.7059020996094, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.0544, "grad_norm": 0.005411915946751833, "learning_rate": 4.138888888888889e-06, "loss": 0.0307, "num_tokens": 10567284.0, "reward": 0.9856216907501221, "reward_std": 0.046699557453393936, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7720246315002441, "rewards/format_reward_step": 0.984375, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46485963091254234, "aux_distill/mean_u": 0.29594562104785566, "aux_distill/n_active_tok": 458.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4904846659364732, "calib/avg_num_step_conf": 7.25, "calib/ece": 0.3437007874015748, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.381161007667054e-05, "calib/mean_conf": 0.002755905511811024, "calib/mu_c": 0.0027272727272727275, "calib/mu_w": 0.002771084337349398, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0057749343459240635, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1609.0, "completions/max_terminated_length": 1609.0, "completions/mean_length": 325.39453125, "completions/mean_terminated_length": 326.67059326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.055466666666666664, "grad_norm": 0.0054336413741111755, "learning_rate": 4.111111111111111e-06, "loss": 0.0414, "num_tokens": 10758537.0, "reward": 0.9891984462738037, "reward_std": 0.03528884798288345, "rewards/accuracy_reward_step": 0.34375, "rewards/final_brier_reward_step": 0.6463655829429626, "rewards/format_reward_step": 0.98828125, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44012278597801924, "aux_distill/mean_u": 0.2862151821432428, "aux_distill/n_active_tok": 500.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5097378277153558, "calib/avg_num_step_conf": 8.2109375, "calib/ece": 0.2937944664031621, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005947565543071168, "calib/mean_conf": 0.002648221343873518, "calib/mu_c": 0.003066666666666667, "calib/mu_w": 0.0024719101123595504, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006136037429351788, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2396.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 344.328125, "completions/mean_terminated_length": 345.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.05653333333333333, "grad_norm": 0.005466809030622244, "learning_rate": 4.083333333333334e-06, "loss": 0.0771, "num_tokens": 10952509.0, "reward": 0.9852514863014221, "reward_std": 0.03851320222020149, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.6931593418121338, "rewards/format_reward_step": 0.984375, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43958625476807356, "aux_distill/mean_u": 0.2571615242030315, "aux_distill/n_active_tok": 463.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4883675464320626, "calib/avg_num_step_conf": 7.29296875, "calib/ece": 0.38732283464566925, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00035842293906810036, "calib/mean_conf": 0.0024409448818897644, "calib/mu_c": 0.0022222222222222222, "calib/mu_w": 0.0025806451612903226, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0058480140559455576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 939.0, "completions/max_terminated_length": 939.0, "completions/mean_length": 311.4921875, "completions/mean_terminated_length": 312.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.0576, "grad_norm": 0.005956499837338924, "learning_rate": 4.055555555555556e-06, "loss": 0.0511, "num_tokens": 11138483.0, "reward": 0.9930269718170166, "reward_std": 0.023992538452148438, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6071476340293884, "rewards/format_reward_step": 0.9921875, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4438884211704135, "aux_distill/mean_u": 0.3088732302722088, "aux_distill/n_active_tok": 503.1875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5007692307692309, "calib/avg_num_step_conf": 7.84765625, "calib/ece": 0.23266666666666666, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006025641025641029, "calib/mean_conf": 0.002627450980392157, "calib/mu_c": 0.0021666666666666666, "calib/mu_w": 0.0027692307692307695, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006051934658466017, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1874.0, "completions/max_terminated_length": 1874.0, "completions/mean_length": 349.70703125, "completions/mean_terminated_length": 349.70703125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.058666666666666666, "grad_norm": 0.004821429029107094, "learning_rate": 4.027777777777779e-06, "loss": 0.0885, "num_tokens": 11335832.0, "reward": 0.9965798258781433, "reward_std": 0.012168581597507, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.7626910209655762, "rewards/format_reward_step": 0.99609375, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43843644205480814, "aux_distill/mean_u": 0.2908842595016219, "aux_distill/n_active_tok": 492.84375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4863632452895122, "calib/avg_num_step_conf": 7.72265625, "calib/ece": 0.2266015625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001141701798158824, "calib/mean_conf": 0.0038671875, "calib/mu_c": 0.004745762711864407, "calib/mu_w": 0.0036040609137055835, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007972483041050872, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 333.1328125, "completions/mean_terminated_length": 334.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.05973333333333333, "grad_norm": 0.0058794510550796986, "learning_rate": 4.000000000000001e-06, "loss": 0.0311, "num_tokens": 11527954.0, "reward": 1.0010545253753662, "reward_std": 0.0027934759855270386, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7716401815414429, "rewards/format_reward_step": 1.0, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.448484574444592, "aux_distill/mean_u": 0.2557524902818454, "aux_distill/n_active_tok": 464.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5290054249547921, "calib/avg_num_step_conf": 7.2578125, "calib/ece": 0.3068503937007874, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007406871609403256, "calib/mean_conf": 0.004173228346456693, "calib/mu_c": 0.0046835443037974685, "calib/mu_w": 0.003942857142857143, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007202465116264535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1941.0, "completions/max_terminated_length": 1941.0, "completions/mean_length": 322.2421875, "completions/mean_terminated_length": 322.2421875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.0608, "grad_norm": 0.005439405329525471, "learning_rate": 3.972222222222223e-06, "loss": 0.0748, "num_tokens": 11717240.0, "reward": 0.9935984015464783, "reward_std": 0.024633333086967468, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6864155530929565, "rewards/format_reward_step": 0.9921875, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43026210833340883, "aux_distill/mean_u": 0.27150247398639, "aux_distill/n_active_tok": 540.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4831393867323537, "calib/avg_num_step_conf": 8.44921875, "calib/ece": 0.23622047244094488, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006506412978849914, "calib/mean_conf": 0.003937007874015748, "calib/mu_c": 0.0034426229508196723, "calib/mu_w": 0.004093264248704664, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008050771989682681, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2337.0, "completions/max_terminated_length": 2337.0, "completions/mean_length": 370.8203125, "completions/mean_terminated_length": 370.8203125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.06186666666666667, "grad_norm": 0.004779992159456015, "learning_rate": 3.944444444444445e-06, "loss": 0.0792, "num_tokens": 11918490.0, "reward": 0.992967963218689, "reward_std": 0.024048078805208206, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.7554671764373779, "rewards/format_reward_step": 0.9921875, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4273897521197796, "aux_distill/mean_u": 0.24913954184704293, "aux_distill/n_active_tok": 500.46875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4096424010217114, "calib/avg_num_step_conf": 7.79296875, "calib/ece": 0.31388235294117645, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017177522349936139, "calib/mean_conf": 0.003764705882352941, "calib/mu_c": 0.002592592592592593, "calib/mu_w": 0.004310344827586207, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.006381923722804189, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1845.0, "completions/max_terminated_length": 1845.0, "completions/mean_length": 332.90625, "completions/mean_terminated_length": 332.90625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.06293333333333333, "grad_norm": 0.004225445911288261, "learning_rate": 3.916666666666667e-06, "loss": 0.0848, "num_tokens": 12109962.0, "reward": 0.9968867301940918, "reward_std": 0.013021206483244896, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6812734603881836, "rewards/format_reward_step": 0.99609375, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43960097804665565, "aux_distill/mean_u": 0.261367273698288, "aux_distill/n_active_tok": 483.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5204790249433107, "calib/avg_num_step_conf": 7.546875, "calib/ece": 0.3288888888888889, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006547619047619054, "calib/mean_conf": 0.0044444444444444444, "calib/mu_c": 0.004880952380952382, "calib/mu_w": 0.004226190476190476, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007133150370990895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2427.0, "completions/max_terminated_length": 2427.0, "completions/mean_length": 340.19921875, "completions/mean_terminated_length": 340.19921875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.064, "grad_norm": 0.006468915846198797, "learning_rate": 3.88888888888889e-06, "loss": 0.1286, "num_tokens": 12305909.0, "reward": 0.9859417676925659, "reward_std": 0.04724940285086632, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.6593835353851318, "rewards/format_reward_step": 0.984375, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43225711584091187, "aux_distill/mean_u": 0.24751454538576817, "aux_distill/n_active_tok": 421.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.519921875, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.3690234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00043750000000000126, "calib/mean_conf": 0.0059765625, "calib/mu_c": 0.006250000000000001, "calib/mu_w": 0.0058125, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.00942815335490433, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 738.0, "completions/max_terminated_length": 738.0, "completions/mean_length": 286.140625, "completions/mean_terminated_length": 287.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.06506666666666666, "grad_norm": 0.005420437548309565, "learning_rate": 3.861111111111112e-06, "loss": 0.0414, "num_tokens": 12483225.0, "reward": 0.9983751773834229, "reward_std": 0.014877854846417904, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6256566047668457, "rewards/format_reward_step": 0.99609375, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4199416805058718, "aux_distill/mean_u": 0.25439553840276957, "aux_distill/n_active_tok": 476.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5237889273356402, "calib/avg_num_step_conf": 7.44921875, "calib/ece": 0.2592549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001323529411764706, "calib/mean_conf": 0.007411764705882353, "calib/mu_c": 0.008382352941176471, "calib/mu_w": 0.007058823529411765, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011494545118187515, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 337.1015625, "completions/mean_terminated_length": 337.1015625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.06613333333333334, "grad_norm": 0.005449065938591957, "learning_rate": 3.833333333333334e-06, "loss": 0.0391, "num_tokens": 12676603.0, "reward": 0.9982271194458008, "reward_std": 0.01538827084004879, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7347354888916016, "rewards/format_reward_step": 0.99609375, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.436340908985585, "aux_distill/mean_u": 0.28511982269740827, "aux_distill/n_active_tok": 459.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5146272412708399, "calib/avg_num_step_conf": 7.21875, "calib/ece": 0.2592549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00012032085561497399, "calib/mean_conf": 0.007411764705882354, "calib/mu_c": 0.0075, "calib/mu_w": 0.007379679144385026, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009998385106053397, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1058.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 320.92578125, "completions/mean_terminated_length": 322.184326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.0672, "grad_norm": 0.0054280711337924, "learning_rate": 3.8055555555555556e-06, "loss": 0.0384, "num_tokens": 12867400.0, "reward": 0.9980087280273438, "reward_std": 0.015086276456713676, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.734298825263977, "rewards/format_reward_step": 0.99609375, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4061308619566262, "aux_distill/mean_u": 0.2416082616255521, "aux_distill/n_active_tok": 445.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.47493517718236816, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.2969921875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0011106309420916153, "calib/mean_conf": 0.0076953125, "calib/mu_c": 0.006923076923076923, "calib/mu_w": 0.008033707865168539, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008736649273453969, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1143.0, "completions/max_terminated_length": 1143.0, "completions/mean_length": 292.5625, "completions/mean_terminated_length": 293.7098083496094, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.06826666666666667, "grad_norm": 0.006516458000987768, "learning_rate": 3.777777777777778e-06, "loss": 0.0512, "num_tokens": 13046072.0, "reward": 1.0020415782928467, "reward_std": 0.003972196485847235, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.6993956565856934, "rewards/format_reward_step": 1.0, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4346964657306671, "aux_distill/mean_u": 0.25160164239136495, "aux_distill/n_active_tok": 407.65625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5213645278789577, "calib/avg_num_step_conf": 6.49609375, "calib/ece": 0.3174901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006430372653404327, "calib/mean_conf": 0.008, "calib/mu_c": 0.008433734939759036, "calib/mu_w": 0.0077906976744186035, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.008466774740702857, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 789.0, "completions/max_terminated_length": 789.0, "completions/mean_length": 281.6328125, "completions/mean_terminated_length": 282.7372741699219, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.06933333333333333, "grad_norm": 0.005870714783668518, "learning_rate": 3.7500000000000005e-06, "loss": 0.0537, "num_tokens": 13223194.0, "reward": 0.9948544502258301, "reward_std": 0.02620820514857769, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.6733027696609497, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43573713721707463, "aux_distill/mean_u": 0.27839431637130474, "aux_distill/n_active_tok": 472.9375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5154248892468788, "calib/avg_num_step_conf": 7.40625, "calib/ece": 0.245, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000641159887233185, "calib/mean_conf": 0.008906250000000001, "calib/mu_c": 0.009384615384615384, "calib/mu_w": 0.008743455497382199, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.009579794409980833, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 887.0, "completions/max_terminated_length": 887.0, "completions/mean_length": 312.73046875, "completions/mean_terminated_length": 313.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.0704, "grad_norm": 0.005807508714497089, "learning_rate": 3.7222222222222225e-06, "loss": 0.0313, "num_tokens": 13409605.0, "reward": 1.0022971630096436, "reward_std": 0.0038538267835974693, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7506882548332214, "rewards/format_reward_step": 1.0, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43293993175029755, "aux_distill/mean_u": 0.2693137654714224, "aux_distill/n_active_tok": 454.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6072888763569716, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.3119607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002733681094036374, "calib/mean_conf": 0.011803921568627451, "calib/mu_c": 0.013658536585365855, "calib/mu_w": 0.010924855491329481, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0010980392156862747, "calib/std_conf": 0.019620936804645387, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2418.0, "completions/max_terminated_length": 2418.0, "completions/mean_length": 318.67578125, "completions/mean_terminated_length": 318.67578125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.07146666666666666, "grad_norm": 0.006566639989614487, "learning_rate": 3.694444444444445e-06, "loss": 0.0716, "num_tokens": 13596194.0, "reward": 1.000207543373108, "reward_std": 0.01748921349644661, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6840089559555054, "rewards/format_reward_step": 0.99609375, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42459358740597963, "aux_distill/mean_u": 0.25818829225068923, "aux_distill/n_active_tok": 435.625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5195084485407067, "calib/avg_num_step_conf": 6.80078125, "calib/ece": 0.2613671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002964669738863262, "calib/mean_conf": 0.0120703125, "calib/mu_c": 0.012285714285714285, "calib/mu_w": 0.011989247311827959, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01231150604728535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1065.0, "completions/max_terminated_length": 1065.0, "completions/mean_length": 299.2109375, "completions/mean_terminated_length": 300.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.07253333333333334, "grad_norm": 0.006065750028938055, "learning_rate": 3.6666666666666666e-06, "loss": 0.0443, "num_tokens": 13776880.0, "reward": 1.0032107830047607, "reward_std": 0.005062174052000046, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7329839468002319, "rewards/format_reward_step": 1.0, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43279402423650026, "aux_distill/mean_u": 0.27462993316679674, "aux_distill/n_active_tok": 432.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5322975964579381, "calib/avg_num_step_conf": 6.91015625, "calib/ece": 0.25610236220472443, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004063883617963348, "calib/mean_conf": 0.011614173228346457, "calib/mu_c": 0.011911764705882354, "calib/mu_w": 0.01150537634408602, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.013107398430191524, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1351.0, "completions/max_terminated_length": 1351.0, "completions/mean_length": 316.20703125, "completions/mean_terminated_length": 317.44708251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.0736, "grad_norm": 0.006692867260426283, "learning_rate": 3.638888888888889e-06, "loss": 0.0452, "num_tokens": 13962325.0, "reward": 0.99129319190979, "reward_std": 0.03864618018269539, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7286800742149353, "rewards/format_reward_step": 0.98828125, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43750183191150427, "aux_distill/mean_u": 0.25953609603320626, "aux_distill/n_active_tok": 443.5625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5386486486486486, "calib/avg_num_step_conf": 6.94921875, "calib/ece": 0.26250980392156864, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009845559845559869, "calib/mean_conf": 0.012000000000000002, "calib/mu_c": 0.012714285714285716, "calib/mu_w": 0.01172972972972973, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.010267023166715966, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 315.14453125, "completions/mean_terminated_length": 316.3804016113281, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.07466666666666667, "grad_norm": 0.006321345455944538, "learning_rate": 3.6111111111111115e-06, "loss": 0.0588, "num_tokens": 14149994.0, "reward": 0.9994460344314575, "reward_std": 0.015884429216384888, "rewards/accuracy_reward_step": 0.2734375, "rewards/final_brier_reward_step": 0.7293609380722046, "rewards/format_reward_step": 0.99609375, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42223017755895853, "aux_distill/mean_u": 0.2762904479705797, "aux_distill/n_active_tok": 491.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.46249290981281904, "calib/avg_num_step_conf": 7.67578125, "calib/ece": 0.3091338582677165, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015031196823596157, "calib/mean_conf": 0.013700787401574804, "calib/mu_c": 0.012682926829268294, "calib/mu_w": 0.01418604651162791, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.012536758526055635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2448.0, "completions/max_terminated_length": 2448.0, "completions/mean_length": 364.0234375, "completions/mean_terminated_length": 364.0234375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.07573333333333333, "grad_norm": 0.005532794166356325, "learning_rate": 3.5833333333333335e-06, "loss": 0.0521, "num_tokens": 14347592.0, "reward": 0.9960788488388062, "reward_std": 0.028619343414902687, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6796578168869019, "rewards/format_reward_step": 0.9921875, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4384244615212083, "aux_distill/mean_u": 0.2782737296333412, "aux_distill/n_active_tok": 492.34375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.542580170157068, "calib/avg_num_step_conf": 7.7109375, "calib/ece": 0.23709803921568628, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014921465968586387, "calib/mean_conf": 0.01388235294117647, "calib/mu_c": 0.015, "calib/mu_w": 0.01350785340314136, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.011956862102007072, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 897.0, "completions/max_terminated_length": 897.0, "completions/mean_length": 330.2734375, "completions/mean_terminated_length": 331.5686340332031, "completions/min_length": 0.0, "completions/min_terminated_length": 139.0, "epoch": 0.0768, "grad_norm": 0.006522673182189465, "learning_rate": 3.555555555555556e-06, "loss": 0.0565, "num_tokens": 14536550.0, "reward": 1.0016295909881592, "reward_std": 0.01137695461511612, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7532593607902527, "rewards/format_reward_step": 0.99609375, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4227779684588313, "aux_distill/mean_u": 0.21715057211677818, "aux_distill/n_active_tok": 431.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4812515915457091, "calib/avg_num_step_conf": 6.8203125, "calib/ece": 0.38316406249999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001501145912910621, "calib/mean_conf": 0.0152734375, "calib/mu_c": 0.016176470588235296, "calib/mu_w": 0.014675324675324675, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.020289011359196234, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 979.0, "completions/max_terminated_length": 979.0, "completions/mean_length": 326.24609375, "completions/mean_terminated_length": 327.5255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.07786666666666667, "grad_norm": 0.00622026901692152, "learning_rate": 3.5277777777777784e-06, "loss": 0.0268, "num_tokens": 14727101.0, "reward": 1.0022165775299072, "reward_std": 0.020840127021074295, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6099019646644592, "rewards/format_reward_step": 0.99609375, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4327737260609865, "aux_distill/mean_u": 0.24665208091788085, "aux_distill/n_active_tok": 452.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4544215425531915, "calib/avg_num_step_conf": 7.203125, "calib/ece": 0.3535433070866142, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0033670212765957407, "calib/mean_conf": 0.01850393700787402, "calib/mu_c": 0.01638297872340426, "calib/mu_w": 0.01975, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.000984251968503937, "calib/std_conf": 0.019665659751870696, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1317.0, "completions/max_terminated_length": 1317.0, "completions/mean_length": 333.16796875, "completions/mean_terminated_length": 334.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.07893333333333333, "grad_norm": 0.006490938365459442, "learning_rate": 3.5e-06, "loss": 0.0469, "num_tokens": 14916320.0, "reward": 0.9997944235801697, "reward_std": 0.02420826628804207, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6363078355789185, "rewards/format_reward_step": 0.9921875, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4080852912738919, "aux_distill/mean_u": 0.2462691897632438, "aux_distill/n_active_tok": 426.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4701713299642548, "calib/avg_num_step_conf": 6.6640625, "calib/ece": 0.5009803921568627, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001543818562800444, "calib/mean_conf": 0.02058823529411765, "calib/mu_c": 0.019849624060150377, "calib/mu_w": 0.02139344262295082, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.014820728026610377, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1676.0, "completions/max_terminated_length": 1676.0, "completions/mean_length": 334.79296875, "completions/mean_terminated_length": 334.79296875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.08, "grad_norm": 0.006999581586569548, "learning_rate": 3.4722222222222224e-06, "loss": 0.0716, "num_tokens": 15106779.0, "reward": 1.006085753440857, "reward_std": 0.022710826247930527, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.49654650688171387, "rewards/format_reward_step": 0.99609375, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4355225879698992, "aux_distill/mean_u": 0.2736861558638344, "aux_distill/n_active_tok": 451.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5060080106809078, "calib/avg_num_step_conf": 7.0546875, "calib/ece": 0.40169291338582674, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0007482993197278874, "calib/mean_conf": 0.019566929133858272, "calib/mu_c": 0.019999999999999997, "calib/mu_w": 0.01925170068027211, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.015376170617330107, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 341.36328125, "completions/mean_terminated_length": 342.7019958496094, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.08106666666666666, "grad_norm": 0.006661584135144949, "learning_rate": 3.444444444444445e-06, "loss": 0.0017, "num_tokens": 15297224.0, "reward": 1.000239610671997, "reward_std": 0.03131163865327835, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5903230905532837, "rewards/format_reward_step": 0.9921875, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4306439831852913, "aux_distill/mean_u": 0.27393377477924047, "aux_distill/n_active_tok": 436.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5247457206648474, "calib/avg_num_step_conf": 6.8515625, "calib/ece": 0.4325882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0014487720168692604, "calib/mean_conf": 0.022313725490196085, "calib/mu_c": 0.023103448275862068, "calib/mu_w": 0.021654676258992808, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.01670210764682748, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 962.0, "completions/max_terminated_length": 962.0, "completions/mean_length": 335.9609375, "completions/mean_terminated_length": 337.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.08213333333333334, "grad_norm": 0.007040133234113455, "learning_rate": 3.416666666666667e-06, "loss": 0.0235, "num_tokens": 15487894.0, "reward": 1.0061755180358887, "reward_std": 0.024019623175263405, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5631324052810669, "rewards/format_reward_step": 0.99609375, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4400795288383961, "aux_distill/mean_u": 0.27258292125392275, "aux_distill/n_active_tok": 472.9375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49562637631514556, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.44999999999999996, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.006491313922192314, "calib/mean_conf": 0.027421875, "calib/mu_c": 0.03081967213114754, "calib/mu_w": 0.024328358208955226, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0004296875, "calib/std_conf": 0.06321152601768426, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 973.0, "completions/max_terminated_length": 973.0, "completions/mean_length": 385.5, "completions/mean_terminated_length": 387.01177978515625, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.0832, "grad_norm": 0.006053322460502386, "learning_rate": 3.3888888888888893e-06, "loss": 0.0424, "num_tokens": 15694606.0, "reward": 1.0123136043548584, "reward_std": 0.016614407300949097, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5480648279190063, "rewards/format_reward_step": 1.0, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41587979532778263, "aux_distill/mean_u": 0.26884553844139497, "aux_distill/n_active_tok": 484.9375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49537037037037035, "calib/avg_num_step_conf": 7.57421875, "calib/ece": 0.3965490196078431, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.826152683295453e-05, "calib/mean_conf": 0.026980392156862747, "calib/mu_c": 0.027037037037037037, "calib/mu_w": 0.026938775510204082, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02005628374084471, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1201.0, "completions/max_terminated_length": 1201.0, "completions/mean_length": 411.1953125, "completions/mean_terminated_length": 412.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.08426666666666667, "grad_norm": 0.0055940039455890656, "learning_rate": 3.3611111111111117e-06, "loss": 0.0385, "num_tokens": 15906248.0, "reward": 1.006937026977539, "reward_std": 0.024199217557907104, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5959054231643677, "rewards/format_reward_step": 0.99609375, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43790998961776495, "aux_distill/mean_u": 0.28386136577758153, "aux_distill/n_active_tok": 444.625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5211038961038962, "calib/avg_num_step_conf": 6.9375, "calib/ece": 0.41011725490196077, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0009577172827172874, "calib/mean_conf": 0.03098078431372549, "calib/mu_c": 0.031517857142857146, "calib/mu_w": 0.03056013986013986, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009411764705882352, "calib/std_conf": 0.019306680327472275, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1641.0, "completions/max_terminated_length": 1641.0, "completions/mean_length": 368.2890625, "completions/mean_terminated_length": 368.2890625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.08533333333333333, "grad_norm": 0.0062352754175662994, "learning_rate": 3.3333333333333333e-06, "loss": 0.0641, "num_tokens": 16102690.0, "reward": 1.0092190504074097, "reward_std": 0.025584885850548744, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5848445296287537, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4371841801330447, "aux_distill/mean_u": 0.27223928922651247, "aux_distill/n_active_tok": 460.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4884761904761904, "calib/avg_num_step_conf": 7.1953125, "calib/ece": 0.375843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001161904761904753, "calib/mean_conf": 0.035921568627450974, "calib/mu_c": 0.03523809523809524, "calib/mu_w": 0.036399999999999995, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.02466707191287799, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2562.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 391.625, "completions/mean_terminated_length": 391.625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.0864, "grad_norm": 0.006331518292427063, "learning_rate": 3.3055555555555558e-06, "loss": 0.044, "num_tokens": 16309194.0, "reward": 1.00960111618042, "reward_std": 0.0268442090600729, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6129523515701294, "rewards/format_reward_step": 0.99609375, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.419726787135005, "aux_distill/mean_u": 0.23885014784892913, "aux_distill/n_active_tok": 417.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5527279616064663, "calib/avg_num_step_conf": 6.51953125, "calib/ece": 0.542470588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0009939378630967471, "calib/mean_conf": 0.04325490196078432, "calib/mu_c": 0.04283783783783784, "calib/mu_w": 0.043831775700934585, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.002666666666666667, "calib/std_conf": 0.03865373844246665, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2239.0, "completions/max_terminated_length": 2239.0, "completions/mean_length": 370.8671875, "completions/mean_terminated_length": 370.8671875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.08746666666666666, "grad_norm": 0.006565708201378584, "learning_rate": 3.277777777777778e-06, "loss": 0.0597, "num_tokens": 16509688.0, "reward": 1.0191833972930908, "reward_std": 0.033703260123729706, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.46414804458618164, "rewards/format_reward_step": 0.99609375, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40507653122767806, "aux_distill/mean_u": 0.2709985755049188, "aux_distill/n_active_tok": 485.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5766917293233083, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.43225296442687744, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009247493734335852, "calib/mean_conf": 0.04205533596837945, "calib/mu_c": 0.046916666666666676, "calib/mu_w": 0.037669172932330824, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.028723241563493374, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2696.0, "completions/max_terminated_length": 2696.0, "completions/mean_length": 466.9375, "completions/mean_terminated_length": 466.9375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.08853333333333334, "grad_norm": 0.005869552958756685, "learning_rate": 3.2500000000000002e-06, "loss": 0.1236, "num_tokens": 16736488.0, "reward": 1.0050857067108154, "reward_std": 0.06311136484146118, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5570465326309204, "rewards/format_reward_step": 0.984375, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4285242296755314, "aux_distill/mean_u": 0.26300995633903906, "aux_distill/n_active_tok": 421.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5220199092741934, "calib/avg_num_step_conf": 6.58203125, "calib/ece": 0.44035714285714284, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003162802419354821, "calib/mean_conf": 0.062103174603174605, "calib/mu_c": 0.06370967741935483, "calib/mu_w": 0.060546875000000014, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005198412698412698, "calib/std_conf": 0.07047979890304709, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 384.6875, "completions/mean_terminated_length": 386.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.0896, "grad_norm": 0.006847344804555178, "learning_rate": 3.2222222222222227e-06, "loss": 0.0825, "num_tokens": 16940888.0, "reward": 1.0128443241119385, "reward_std": 0.06419685482978821, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.553032398223877, "rewards/format_reward_step": 0.984375, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4118081103079021, "aux_distill/mean_u": 0.2785398073769991, "aux_distill/n_active_tok": 480.65625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5516667798221196, "calib/avg_num_step_conf": 7.5546875, "calib/ece": 0.3671138211382114, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0040650406504065045, "calib/gap": 0.006341910516667791, "calib/mean_conf": 0.062235772357723575, "calib/mu_c": 0.06592233009708738, "calib/mu_w": 0.05958041958041959, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00532520325203252, "calib/std_conf": 0.0744811408771018, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 458.8828125, "completions/mean_terminated_length": 460.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.09066666666666667, "grad_norm": 0.006029889453202486, "learning_rate": 3.1944444444444443e-06, "loss": 0.0813, "num_tokens": 17166186.0, "reward": 0.9829345345497131, "reward_std": 0.10405939817428589, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.602587878704071, "rewards/format_reward_step": 0.9609375, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42284269724041224, "aux_distill/mean_u": 0.25688785203974734, "aux_distill/n_active_tok": 382.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5004802151363811, "calib/avg_num_step_conf": 5.98046875, "calib/ece": 0.3923107569721116, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004747726981687794, "calib/mean_conf": 0.070398406374502, "calib/mu_c": 0.06780701754385966, "calib/mu_w": 0.07255474452554746, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004262948207171314, "calib/std_conf": 0.0637087546243611, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2434.0, "completions/max_terminated_length": 2434.0, "completions/mean_length": 442.07421875, "completions/mean_terminated_length": 443.807861328125, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.09173333333333333, "grad_norm": 0.00629810057580471, "learning_rate": 3.1666666666666667e-06, "loss": 0.074, "num_tokens": 17384869.0, "reward": 1.002341628074646, "reward_std": 0.0852438285946846, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5828081965446472, "rewards/format_reward_step": 0.9765625, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40320085268467665, "aux_distill/mean_u": 0.2343219956904517, "aux_distill/n_active_tok": 333.71875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5096732026143791, "calib/avg_num_step_conf": 5.1953125, "calib/ece": 0.5114682539682538, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.010615686274509803, "calib/mean_conf": 0.08376984126984129, "calib/mu_c": 0.08806666666666665, "calib/mu_w": 0.07745098039215685, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.08096439181050301, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2678.0, "completions/max_terminated_length": 2678.0, "completions/mean_length": 393.45703125, "completions/mean_terminated_length": 393.45703125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.0928, "grad_norm": 0.00688812555745244, "learning_rate": 3.138888888888889e-06, "loss": 0.1493, "num_tokens": 17591090.0, "reward": 1.0292962789535522, "reward_std": 0.09010984003543854, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.4882800579071045, "rewards/format_reward_step": 0.984375, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43424535216763616, "aux_distill/mean_u": 0.2367017103593896, "aux_distill/n_active_tok": 406.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48049410726665404, "calib/avg_num_step_conf": 6.3359375, "calib/ece": 0.43170634920634926, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.013497825675931177, "calib/mean_conf": 0.09162698412698411, "calib/mu_c": 0.08503875968992249, "calib/mu_w": 0.09853658536585366, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005714285714285713, "calib/std_conf": 0.08985510683449129, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2639.0, "completions/max_terminated_length": 2639.0, "completions/mean_length": 462.8828125, "completions/mean_terminated_length": 462.8828125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.09386666666666667, "grad_norm": 0.005841149017214775, "learning_rate": 3.1111111111111116e-06, "loss": 0.123, "num_tokens": 17819436.0, "reward": 1.017090082168579, "reward_std": 0.08964027464389801, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5498050451278687, "rewards/format_reward_step": 0.98046875, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4475083462893963, "aux_distill/mean_u": 0.2913353557635836, "aux_distill/n_active_tok": 400.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5194451646357272, "calib/avg_num_step_conf": 6.203125, "calib/ece": 0.3772690763052209, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005897718434016069, "calib/mean_conf": 0.09710843373493976, "calib/mu_c": 0.10025862068965517, "calib/mu_w": 0.0943609022556391, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.004257028112449799, "calib/std_conf": 0.08784154391005794, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2504.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 466.37109375, "completions/mean_terminated_length": 468.2000427246094, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.09493333333333333, "grad_norm": 0.005623057018965483, "learning_rate": 3.0833333333333336e-06, "loss": 0.1245, "num_tokens": 18047715.0, "reward": 0.998233437538147, "reward_std": 0.1062500923871994, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5824043154716492, "rewards/format_reward_step": 0.9609375, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43033392354846, "aux_distill/mean_u": 0.2852346709679915, "aux_distill/n_active_tok": 404.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.572209723784527, "calib/avg_num_step_conf": 6.3203125, "calib/ece": 0.39778656126482215, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.013782652168478945, "calib/mean_conf": 0.1165217391304348, "calib/mu_c": 0.12338582677165355, "calib/mu_w": 0.1096031746031746, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00616600790513834, "calib/std_conf": 0.11560357384745286, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2565.0, "completions/max_terminated_length": 2565.0, "completions/mean_length": 415.140625, "completions/mean_terminated_length": 415.140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.096, "grad_norm": 0.006214203778654337, "learning_rate": 3.055555555555556e-06, "loss": 0.0922, "num_tokens": 18257311.0, "reward": 1.0322825908660889, "reward_std": 0.0971316397190094, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5840964913368225, "rewards/format_reward_step": 0.984375, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4307767665013671, "aux_distill/mean_u": 0.23621513864912877, "aux_distill/n_active_tok": 395.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5367861803268502, "calib/avg_num_step_conf": 6.18359375, "calib/ece": 0.4246850393700787, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.004238488783943381, "calib/mean_conf": 0.12059055118110239, "calib/mu_c": 0.11857142857142855, "calib/mu_w": 0.12280991735537193, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.010826771653543307, "calib/std_conf": 0.13331939355593167, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1697.0, "completions/max_terminated_length": 1697.0, "completions/mean_length": 429.49609375, "completions/mean_terminated_length": 431.180419921875, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.09706666666666666, "grad_norm": 0.005459485575556755, "learning_rate": 3.0277777777777776e-06, "loss": 0.0263, "num_tokens": 18474974.0, "reward": 1.027876853942871, "reward_std": 0.09287009388208389, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5557538866996765, "rewards/format_reward_step": 0.98046875, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41957229003310204, "aux_distill/mean_u": 0.22008513333165414, "aux_distill/n_active_tok": 414.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4638769230769231, "calib/avg_num_step_conf": 6.47265625, "calib/ece": 0.3938039215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.021046153846153853, "calib/mean_conf": 0.14447058823529413, "calib/mu_c": 0.13415384615384615, "calib/mu_w": 0.1552, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01423529411764706, "calib/std_conf": 0.14214468642916672, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1868.0, "completions/max_terminated_length": 1868.0, "completions/mean_length": 435.60546875, "completions/mean_terminated_length": 435.60546875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.09813333333333334, "grad_norm": 0.006024438422173262, "learning_rate": 3e-06, "loss": 0.0915, "num_tokens": 18693209.0, "reward": 1.0437605381011963, "reward_std": 0.09087259322404861, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5836148262023926, "rewards/format_reward_step": 0.99609375, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4368822705000639, "aux_distill/mean_u": 0.2506925324287287, "aux_distill/n_active_tok": 436.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4765416401780038, "calib/avg_num_step_conf": 6.8125, "calib/ece": 0.3680876494023905, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": -0.017501589319771166, "calib/mean_conf": 0.16848605577689246, "calib/mu_c": 0.15942148760330577, "calib/mu_w": 0.17692307692307693, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.027250996015936255, "calib/std_conf": 0.18404296117631527, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2461.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 466.32421875, "completions/mean_terminated_length": 466.32421875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.0992, "grad_norm": 0.005379728972911835, "learning_rate": 2.9722222222222225e-06, "loss": 0.0783, "num_tokens": 18918364.0, "reward": 1.0252985954284668, "reward_std": 0.13064992427825928, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5974722504615784, "rewards/format_reward_step": 0.98046875, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3870116211473942, "aux_distill/mean_u": 0.1967927027645488, "aux_distill/n_active_tok": 396.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5232878467550772, "calib/avg_num_step_conf": 6.1953125, "calib/ece": 0.359, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.044, "calib/gap": 0.02757063232750337, "calib/mean_conf": 0.19340000000000002, "calib/mu_c": 0.20674418604651165, "calib/mu_w": 0.17917355371900828, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.018200000000000004, "calib/std_conf": 0.21385799026456784, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2775.0, "completions/max_terminated_length": 2775.0, "completions/mean_length": 412.4296875, "completions/mean_terminated_length": 414.0470886230469, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.10026666666666667, "grad_norm": 0.006159038282930851, "learning_rate": 2.944444444444445e-06, "loss": 0.0775, "num_tokens": 19132626.0, "reward": 1.038222074508667, "reward_std": 0.15837010741233826, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.595975399017334, "rewards/format_reward_step": 0.97265625, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4291246635839343, "aux_distill/mean_u": 0.2764642272026086, "aux_distill/n_active_tok": 399.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5379575379575381, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.4132228346456693, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.027195577395577375, "calib/mean_conf": 0.1688244094488189, "calib/mu_c": 0.1807090909090909, "calib/mu_w": 0.15351351351351353, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.009527559055118111, "calib/std_conf": 0.17708300167695082, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2584.0, "completions/max_terminated_length": 2584.0, "completions/mean_length": 466.75390625, "completions/mean_terminated_length": 466.75390625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.10133333333333333, "grad_norm": 0.006041058339178562, "learning_rate": 2.916666666666667e-06, "loss": 0.1034, "num_tokens": 19358243.0, "reward": 1.054192066192627, "reward_std": 0.1343843936920166, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.5693216323852539, "rewards/format_reward_step": 0.98046875, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41332088224589825, "aux_distill/mean_u": 0.24489968288977096, "aux_distill/n_active_tok": 421.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5761417859577369, "calib/avg_num_step_conf": 6.6875, "calib/ece": 0.4851758893280632, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02771336059986365, "calib/mean_conf": 0.16707707509881423, "calib/mu_c": 0.17693558282208588, "calib/mu_w": 0.14922222222222223, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00399209486166008, "calib/std_conf": 0.14269122454238534, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1696.0, "completions/max_terminated_length": 1696.0, "completions/mean_length": 417.02734375, "completions/mean_terminated_length": 420.31103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 108.0, "epoch": 0.1024, "grad_norm": 0.0056716748513281345, "learning_rate": 2.888888888888889e-06, "loss": 0.0577, "num_tokens": 19570818.0, "reward": 1.0770845413208008, "reward_std": 0.12300469726324081, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.5291690826416016, "rewards/format_reward_step": 0.98828125, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.425160467159003, "aux_distill/mean_u": 0.2345694086333077, "aux_distill/n_active_tok": 426.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5501902801172873, "calib/avg_num_step_conf": 6.66796875, "calib/ece": 0.31923976377952756, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.015748031496062992, "calib/gap": 0.023136022209744866, "calib/mean_conf": 0.18965787401574805, "calib/mu_c": 0.20213675213675217, "calib/mu_w": 0.1790007299270073, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.024133858267716538, "calib/std_conf": 0.18904196800273304, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1863.0, "completions/max_terminated_length": 1863.0, "completions/mean_length": 436.94921875, "completions/mean_terminated_length": 436.94921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10346666666666667, "grad_norm": 0.005866437219083309, "learning_rate": 2.861111111111111e-06, "loss": 0.0759, "num_tokens": 19787749.0, "reward": 1.0457253456115723, "reward_std": 0.13500013947486877, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6461380124092102, "rewards/format_reward_step": 0.98828125, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4224176909774542, "aux_distill/mean_u": 0.30284410290391983, "aux_distill/n_active_tok": 406.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5889187632033801, "calib/avg_num_step_conf": 6.34765625, "calib/ece": 0.31840120000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": 0.05760982651558799, "calib/mean_conf": 0.2200788, "calib/mu_c": 0.2484228346456693, "calib/mu_w": 0.1908130081300813, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.015240000000000007, "calib/std_conf": 0.22231795732814746, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 483.60546875, "completions/mean_terminated_length": 485.5019836425781, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.10453333333333334, "grad_norm": 0.005700993817299604, "learning_rate": 2.8333333333333335e-06, "loss": 0.0761, "num_tokens": 20017736.0, "reward": 1.0433740615844727, "reward_std": 0.18059629201889038, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6258106231689453, "rewards/format_reward_step": 0.96484375, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40645847423002124, "aux_distill/mean_u": 0.23056967960458863, "aux_distill/n_active_tok": 440.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5040017785682525, "calib/avg_num_step_conf": 6.9453125, "calib/ece": 0.20314741035856573, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.0025900400177856775, "calib/mean_conf": 0.2260159362549801, "calib/mu_c": 0.2242307692307692, "calib/mu_w": 0.22682080924855488, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05920318725099602, "calib/std_conf": 0.21963403041367904, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2446.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 502.109375, "completions/mean_terminated_length": 506.06298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.1056, "grad_norm": 0.005459653213620186, "learning_rate": 2.805555555555556e-06, "loss": 0.0616, "num_tokens": 20252076.0, "reward": 0.9931430816650391, "reward_std": 0.16258558630943298, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.7089425921440125, "rewards/format_reward_step": 0.97265625, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4357182774692774, "aux_distill/mean_u": 0.28317105445502583, "aux_distill/n_active_tok": 429.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5066358024691358, "calib/avg_num_step_conf": 6.76953125, "calib/ece": 0.2976078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": 0.011833333333333335, "calib/mean_conf": 0.2115686274509804, "calib/mu_c": 0.21783333333333335, "calib/mu_w": 0.20600000000000002, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.019294117647058823, "calib/std_conf": 0.19430895702583822, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1653.0, "completions/max_terminated_length": 1653.0, "completions/mean_length": 476.0078125, "completions/mean_terminated_length": 477.8745422363281, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.10666666666666667, "grad_norm": 0.005962701514363289, "learning_rate": 2.7777777777777783e-06, "loss": 0.0681, "num_tokens": 20481342.0, "reward": 1.0551525354385376, "reward_std": 0.1266723871231079, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6493675708770752, "rewards/format_reward_step": 0.9921875, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41964778350666165, "aux_distill/mean_u": 0.2572269155690119, "aux_distill/n_active_tok": 507.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5264010366051183, "calib/avg_num_step_conf": 7.9375, "calib/ece": 0.25271706349206347, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.03571428571428571, "calib/gap": 0.024962040816326525, "calib/mean_conf": 0.22148928571428572, "calib/mu_c": 0.23605047619047623, "calib/mu_w": 0.2110884353741497, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.028769841269841265, "calib/std_conf": 0.20629327983683568, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 539.640625, "completions/mean_terminated_length": 539.640625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.10773333333333333, "grad_norm": 0.00484135327860713, "learning_rate": 2.7500000000000004e-06, "loss": 0.1312, "num_tokens": 20726482.0, "reward": 1.0217359066009521, "reward_std": 0.17179188132286072, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6645654439926147, "rewards/format_reward_step": 0.96875, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.412255329079926, "aux_distill/mean_u": 0.2408152103888565, "aux_distill/n_active_tok": 433.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5201820464532328, "calib/avg_num_step_conf": 6.72265625, "calib/ece": 0.335296442687747, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": 0.009948524795982377, "calib/mean_conf": 0.25395256916996045, "calib/mu_c": 0.2585925925925926, "calib/mu_w": 0.24864406779661022, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027826086956521737, "calib/std_conf": 0.2319248160071721, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 433.71875, "completions/mean_terminated_length": 433.71875, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.1088, "grad_norm": 0.005836244206875563, "learning_rate": 2.7222222222222224e-06, "loss": 0.0772, "num_tokens": 20944210.0, "reward": 1.0662009716033936, "reward_std": 0.15195344388484955, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6167769432067871, "rewards/format_reward_step": 0.98828125, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44190127309411764, "aux_distill/mean_u": 0.28445447424545817, "aux_distill/n_active_tok": 472.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5412144702842377, "calib/avg_num_step_conf": 7.375, "calib/ece": 0.3301204819277109, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.05622489959839357, "calib/gap": 0.0429496124031008, "calib/mean_conf": 0.25108433734939756, "calib/mu_c": 0.27178294573643413, "calib/mu_w": 0.22883333333333333, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.031566265060240954, "calib/std_conf": 0.23890317667827385, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 561.2734375, "completions/mean_terminated_length": 561.2734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.10986666666666667, "grad_norm": 0.004805284086614847, "learning_rate": 2.6944444444444444e-06, "loss": 0.091, "num_tokens": 21192448.0, "reward": 1.0435025691986084, "reward_std": 0.1977090835571289, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6221612691879272, "rewards/format_reward_step": 0.9609375, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3976264260709286, "aux_distill/mean_u": 0.22159318477906012, "aux_distill/n_active_tok": 488.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48812297734627846, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.3151778656126482, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05138339920948617, "calib/gap": -0.05741618122977346, "calib/mean_conf": 0.2656916996047431, "calib/mu_c": 0.23165048543689323, "calib/mu_w": 0.2890666666666667, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08687747035573123, "calib/std_conf": 0.2508531210690347, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2328.0, "completions/max_terminated_length": 2328.0, "completions/mean_length": 501.20703125, "completions/mean_terminated_length": 505.1535339355469, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.11093333333333333, "grad_norm": 0.005391786806285381, "learning_rate": 2.666666666666667e-06, "loss": 0.0078, "num_tokens": 21427437.0, "reward": 1.0155071020126343, "reward_std": 0.16230294108390808, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.640389084815979, "rewards/format_reward_step": 0.98828125, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41611582646146417, "aux_distill/mean_u": 0.23099319336662405, "aux_distill/n_active_tok": 480.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5330399285623165, "calib/avg_num_step_conf": 7.41796875, "calib/ece": 0.30884462151394415, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.03201811455542802, "calib/mean_conf": 0.27059760956175305, "calib/mu_c": 0.28552238805970154, "calib/mu_w": 0.2535042735042735, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.022788844621513947, "calib/std_conf": 0.20847396924042944, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2963.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 524.15625, "completions/mean_terminated_length": 524.15625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.112, "grad_norm": 0.005053746979683638, "learning_rate": 2.6388888888888893e-06, "loss": 0.0846, "num_tokens": 21667381.0, "reward": 1.0659711360931396, "reward_std": 0.19321054220199585, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6358484029769897, "rewards/format_reward_step": 0.97265625, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4285811148583889, "aux_distill/mean_u": 0.2643595084666108, "aux_distill/n_active_tok": 457.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49419411838949895, "calib/avg_num_step_conf": 7.140625, "calib/ece": 0.2772727272727272, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": -0.022519878833775098, "calib/mean_conf": 0.2731620553359684, "calib/mu_c": 0.2607894736842105, "calib/mu_w": 0.2833093525179856, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.04992094861660079, "calib/std_conf": 0.22256739757956648, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2932.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 468.84375, "completions/mean_terminated_length": 470.682373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.11306666666666666, "grad_norm": 0.00532987155020237, "learning_rate": 2.6111111111111113e-06, "loss": 0.0613, "num_tokens": 21891989.0, "reward": 1.0404398441314697, "reward_std": 0.16531819105148315, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6511921882629395, "rewards/format_reward_step": 0.984375, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4220479386858642, "aux_distill/mean_u": 0.2698333611404751, "aux_distill/n_active_tok": 492.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4615858163014036, "calib/avg_num_step_conf": 7.6484375, "calib/ece": 0.29654901960784313, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": -0.024373307067224903, "calib/mean_conf": 0.2836078431372549, "calib/mu_c": 0.2717557251908397, "calib/mu_w": 0.2961290322580646, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.033215686274509805, "calib/std_conf": 0.2114087276688539, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1274.0, "completions/max_terminated_length": 1274.0, "completions/mean_length": 467.12109375, "completions/mean_terminated_length": 468.9529724121094, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.11413333333333334, "grad_norm": 0.005111105740070343, "learning_rate": 2.5833333333333337e-06, "loss": 0.0419, "num_tokens": 22116188.0, "reward": 1.0728371143341064, "reward_std": 0.14746427536010742, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6378617286682129, "rewards/format_reward_step": 0.99609375, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40239980118349195, "aux_distill/mean_u": 0.257174486721168, "aux_distill/n_active_tok": 518.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45993031358885017, "calib/avg_num_step_conf": 8.09765625, "calib/ece": 0.4353721568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.027450980392156862, "calib/gap": -0.037388528544626154, "calib/mean_conf": 0.27364745098039217, "calib/mu_c": 0.2603048780487805, "calib/mu_w": 0.29769340659340665, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03294117647058824, "calib/std_conf": 0.2238614374995945, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2637.0, "completions/max_terminated_length": 2637.0, "completions/mean_length": 518.359375, "completions/mean_terminated_length": 518.359375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1152, "grad_norm": 0.004731772933155298, "learning_rate": 2.5555555555555557e-06, "loss": 0.0327, "num_tokens": 22352120.0, "reward": 1.093104362487793, "reward_std": 0.16300244629383087, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.5612086057662964, "rewards/format_reward_step": 0.984375, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4148107245564461, "aux_distill/mean_u": 0.24032306112382526, "aux_distill/n_active_tok": 531.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5454946439823566, "calib/avg_num_step_conf": 8.375, "calib/ece": 0.23936758893280635, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.019762845849802372, "calib/gap": 0.020057971014492693, "calib/mean_conf": 0.2885375494071146, "calib/mu_c": 0.2994782608695652, "calib/mu_w": 0.2794202898550725, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.036679841897233195, "calib/std_conf": 0.2033898386243882, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2597.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 519.44140625, "completions/mean_terminated_length": 521.4784545898438, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.11626666666666667, "grad_norm": 0.00585621502250433, "learning_rate": 2.5277777777777778e-06, "loss": 0.0727, "num_tokens": 22589697.0, "reward": 1.0482683181762695, "reward_std": 0.1747893989086151, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6746617555618286, "rewards/format_reward_step": 0.97265625, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4172109765931964, "aux_distill/mean_u": 0.22872385472207696, "aux_distill/n_active_tok": 423.8125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5273184759166067, "calib/avg_num_step_conf": 6.62890625, "calib/ece": 0.22584, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": 0.006161688778511143, "calib/mean_conf": 0.30984, "calib/mu_c": 0.31336448598130834, "calib/mu_w": 0.3072027972027972, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.05384, "calib/std_conf": 0.23109646124508268, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2802.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 470.04296875, "completions/mean_terminated_length": 471.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.11733333333333333, "grad_norm": 0.004899916704744101, "learning_rate": 2.5e-06, "loss": 0.0564, "num_tokens": 22814948.0, "reward": 1.0319619178771973, "reward_std": 0.18836748600006104, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6732988357543945, "rewards/format_reward_step": 0.97265625, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41732750553637743, "aux_distill/mean_u": 0.2677599661508874, "aux_distill/n_active_tok": 472.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5099623572170301, "calib/avg_num_step_conf": 7.3984375, "calib/ece": 0.2764541832669323, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": -0.016223390446521324, "calib/mean_conf": 0.30968127490039843, "calib/mu_c": 0.30037383177570093, "calib/mu_w": 0.31659722222222225, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.07992031872509961, "calib/std_conf": 0.22857408766908183, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2010.0, "completions/max_terminated_length": 2010.0, "completions/mean_length": 482.55078125, "completions/mean_terminated_length": 484.44317626953125, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.1184, "grad_norm": 0.005739626474678516, "learning_rate": 2.4722222222222226e-06, "loss": 0.0478, "num_tokens": 23045889.0, "reward": 1.0353412628173828, "reward_std": 0.1884884238243103, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.6683386564254761, "rewards/format_reward_step": 0.98046875, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3907640604302287, "aux_distill/mean_u": 0.27975034768577645, "aux_distill/n_active_tok": 493.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5553652230122819, "calib/avg_num_step_conf": 7.75390625, "calib/ece": 0.235140562248996, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.04242921784098258, "calib/mean_conf": 0.27473895582329316, "calib/mu_c": 0.29689075630252104, "calib/mu_w": 0.25446153846153846, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.015983935742971887, "calib/std_conf": 0.19097307398810948, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2538.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 539.71875, "completions/mean_terminated_length": 543.968505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.11946666666666667, "grad_norm": 0.005129808560013771, "learning_rate": 2.4444444444444447e-06, "loss": 0.0865, "num_tokens": 23291977.0, "reward": 1.0484795570373535, "reward_std": 0.16111013293266296, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6672718524932861, "rewards/format_reward_step": 0.96484375, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4189460719935596, "aux_distill/mean_u": 0.27368923599010614, "aux_distill/n_active_tok": 495.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5023419203747073, "calib/avg_num_step_conf": 7.71484375, "calib/ece": 0.2673333333333334, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": -0.0001429804018241998, "calib/mean_conf": 0.3007450980392157, "calib/mu_c": 0.3006766917293233, "calib/mu_w": 0.3008196721311475, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.023254901960784318, "calib/std_conf": 0.18694193741952836, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2569.0, "completions/max_terminated_length": 2569.0, "completions/mean_length": 469.48046875, "completions/mean_terminated_length": 469.48046875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.12053333333333334, "grad_norm": 0.00572775537148118, "learning_rate": 2.4166666666666667e-06, "loss": 0.0455, "num_tokens": 23517364.0, "reward": 1.089852213859558, "reward_std": 0.14501136541366577, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6640793085098267, "rewards/format_reward_step": 0.99609375, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4068376966752112, "aux_distill/mean_u": 0.24630880528646237, "aux_distill/n_active_tok": 466.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5650943396226416, "calib/avg_num_step_conf": 7.3515625, "calib/ece": 0.3013671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.01953125, "calib/gap": 0.03843144654088049, "calib/mean_conf": 0.31082031250000003, "calib/mu_c": 0.3267333333333333, "calib/mu_w": 0.28830188679245283, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013125000000000001, "calib/std_conf": 0.19955544082385313, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1676.0, "completions/max_terminated_length": 1676.0, "completions/mean_length": 435.13671875, "completions/mean_terminated_length": 436.8431701660156, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.1216, "grad_norm": 0.005413229577243328, "learning_rate": 2.388888888888889e-06, "loss": 0.0163, "num_tokens": 23733783.0, "reward": 1.1232295036315918, "reward_std": 0.13653072714805603, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6605215072631836, "rewards/format_reward_step": 1.0, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3964782766997814, "aux_distill/mean_u": 0.20929134778443492, "aux_distill/n_active_tok": 471.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.477160797833046, "calib/avg_num_step_conf": 7.4375, "calib/ece": 0.27051176470588234, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.03655121891159807, "calib/mean_conf": 0.3403901960784314, "calib/mu_c": 0.3216129032258065, "calib/mu_w": 0.35816412213740456, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06231372549019608, "calib/std_conf": 0.23257696533802288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1745.0, "completions/max_terminated_length": 1745.0, "completions/mean_length": 431.984375, "completions/mean_terminated_length": 433.678466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.12266666666666666, "grad_norm": 0.005717757623642683, "learning_rate": 2.361111111111111e-06, "loss": -0.0018, "num_tokens": 23949635.0, "reward": 1.0635097026824951, "reward_std": 0.17919078469276428, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6504567861557007, "rewards/format_reward_step": 0.9921875, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39956394489854574, "aux_distill/mean_u": 0.23843049430854713, "aux_distill/n_active_tok": 543.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5721250000000001, "calib/avg_num_step_conf": 8.51171875, "calib/ece": 0.255098814229249, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.044694375000000064, "calib/mean_conf": 0.2843478260869565, "calib/mu_c": 0.30696, "calib/mu_w": 0.26226562499999995, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02268774703557313, "calib/std_conf": 0.19661580874948692, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1718.0, "completions/max_terminated_length": 1718.0, "completions/mean_length": 515.09765625, "completions/mean_terminated_length": 517.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.12373333333333333, "grad_norm": 0.005036481190472841, "learning_rate": 2.3333333333333336e-06, "loss": 0.0429, "num_tokens": 24186020.0, "reward": 1.0783438682556152, "reward_std": 0.16028830409049988, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6801249980926514, "rewards/format_reward_step": 0.984375, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3888081591576338, "aux_distill/mean_u": 0.20686839885684585, "aux_distill/n_active_tok": 542.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4939897698209719, "calib/avg_num_step_conf": 8.4375, "calib/ece": 0.20772908366533865, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": -0.0021815856777493425, "calib/mean_conf": 0.33752988047808763, "calib/mu_c": 0.3363478260869565, "calib/mu_w": 0.33852941176470586, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04354581673306772, "calib/std_conf": 0.2156641601742187, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2425.0, "completions/max_terminated_length": 2425.0, "completions/mean_length": 501.34375, "completions/mean_terminated_length": 501.34375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.1248, "grad_norm": 0.0053986916318535805, "learning_rate": 2.305555555555556e-06, "loss": 0.0998, "num_tokens": 24420964.0, "reward": 1.052910566329956, "reward_std": 0.18577823042869568, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6761335730552673, "rewards/format_reward_step": 0.98046875, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3707348401658237, "aux_distill/mean_u": 0.19880193935700557, "aux_distill/n_active_tok": 502.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.57578125, "calib/avg_num_step_conf": 7.84765625, "calib/ece": 0.21328063241106723, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.05713500000000005, "calib/mean_conf": 0.3311462450592885, "calib/mu_c": 0.359375, "calib/mu_w": 0.30223999999999995, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.019249011857707513, "calib/std_conf": 0.20804726953127312, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2293.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 466.609375, "completions/mean_terminated_length": 468.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 61.0, "epoch": 0.12586666666666665, "grad_norm": 0.005608038976788521, "learning_rate": 2.277777777777778e-06, "loss": -0.0022, "num_tokens": 24644424.0, "reward": 1.0923941135406494, "reward_std": 0.15614479780197144, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6965069770812988, "rewards/format_reward_step": 0.98828125, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42202150681987405, "aux_distill/mean_u": 0.285246961711442, "aux_distill/n_active_tok": 570.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5502906976744186, "calib/avg_num_step_conf": 9.1953125, "calib/ece": 0.23879518072289155, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0321285140562249, "calib/gap": 0.02000968992248059, "calib/mean_conf": 0.3208835341365462, "calib/mu_c": 0.33125, "calib/mu_w": 0.3112403100775194, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03887550200803214, "calib/std_conf": 0.20838454589009983, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 521.65234375, "completions/mean_terminated_length": 527.8379516601562, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.12693333333333334, "grad_norm": 0.005744331516325474, "learning_rate": 2.25e-06, "loss": 0.0611, "num_tokens": 24883031.0, "reward": 1.0567359924316406, "reward_std": 0.2007881999015808, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6720656156539917, "rewards/format_reward_step": 0.97265625, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40057709300890565, "aux_distill/mean_u": 0.23066852190770373, "aux_distill/n_active_tok": 513.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5443661077235772, "calib/avg_num_step_conf": 8.1015625, "calib/ece": 0.21824701195219126, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": 0.02910251524390245, "calib/mean_conf": 0.32581673306772907, "calib/mu_c": 0.340078125, "calib/mu_w": 0.31097560975609756, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.017051792828685252, "calib/std_conf": 0.18867864678303412, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2296.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 468.4609375, "completions/mean_terminated_length": 470.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.128, "grad_norm": 0.006349902134388685, "learning_rate": 2.222222222222222e-06, "loss": 0.0562, "num_tokens": 25109645.0, "reward": 1.0797877311706543, "reward_std": 0.17021773755550385, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6791066527366638, "rewards/format_reward_step": 0.9765625, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39616987807676196, "aux_distill/mean_u": 0.23927794179594877, "aux_distill/n_active_tok": 600.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5476857452091424, "calib/avg_num_step_conf": 9.37890625, "calib/ece": 0.1884126984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03968253968253968, "calib/gap": 0.021945629337238093, "calib/mean_conf": 0.33214285714285713, "calib/mu_c": 0.3442477876106194, "calib/mu_w": 0.3223021582733813, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03607142857142857, "calib/std_conf": 0.2082070365478247, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 545.3125, "completions/mean_terminated_length": 545.3125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12906666666666666, "grad_norm": 0.0052563464269042015, "learning_rate": 2.1944444444444445e-06, "loss": 0.0883, "num_tokens": 25354301.0, "reward": 1.0606942176818848, "reward_std": 0.19225087761878967, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.69560706615448, "rewards/format_reward_step": 0.984375, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4316580416634679, "aux_distill/mean_u": 0.27906441364180823, "aux_distill/n_active_tok": 497.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5692455242966752, "calib/avg_num_step_conf": 7.82421875, "calib/ece": 0.20302788844621517, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.027888446215139442, "calib/gap": 0.049783248081841425, "calib/mean_conf": 0.3712350597609562, "calib/mu_c": 0.3940441176470588, "calib/mu_w": 0.3442608695652174, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.016215139442231078, "calib/std_conf": 0.2060703969325257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2694.0, "completions/max_terminated_length": 2694.0, "completions/mean_length": 450.72265625, "completions/mean_terminated_length": 454.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 151.0, "epoch": 0.13013333333333332, "grad_norm": 0.00618218956515193, "learning_rate": 2.166666666666667e-06, "loss": 0.0498, "num_tokens": 25577030.0, "reward": 1.097528338432312, "reward_std": 0.1832428276538849, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.687244176864624, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40500340377911925, "aux_distill/mean_u": 0.2747171806388661, "aux_distill/n_active_tok": 577.6875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5615291571173924, "calib/avg_num_step_conf": 9.09765625, "calib/ece": 0.16545816733067728, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0398406374501992, "calib/gap": 0.052460529666412015, "calib/mean_conf": 0.36585657370517927, "calib/mu_c": 0.3934453781512605, "calib/mu_w": 0.3409848484848485, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.028605577689243038, "calib/std_conf": 0.2113953909180801, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 542.421875, "completions/mean_terminated_length": 546.6929321289062, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1312, "grad_norm": 0.0051978956907987595, "learning_rate": 2.138888888888889e-06, "loss": 0.0095, "num_tokens": 25821178.0, "reward": 1.0758334398269653, "reward_std": 0.17953169345855713, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7063542604446411, "rewards/format_reward_step": 0.98046875, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39497670345008373, "aux_distill/mean_u": 0.20548810591655767, "aux_distill/n_active_tok": 525.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5154095701540956, "calib/avg_num_step_conf": 8.203125, "calib/ece": 0.232755905511811, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": -0.005556179424792529, "calib/mean_conf": 0.3860629921259843, "calib/mu_c": 0.38350364963503647, "calib/mu_w": 0.389059829059829, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0397244094488189, "calib/std_conf": 0.20238988443924427, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2281.0, "completions/max_terminated_length": 2281.0, "completions/mean_length": 486.09765625, "completions/mean_terminated_length": 486.09765625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 0.13226666666666667, "grad_norm": 0.0059423367492854595, "learning_rate": 2.1111111111111114e-06, "loss": 0.0639, "num_tokens": 26052435.0, "reward": 1.0993671417236328, "reward_std": 0.16392850875854492, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6752969026565552, "rewards/format_reward_step": 0.98828125, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36721099307760596, "aux_distill/mean_u": 0.194680674087944, "aux_distill/n_active_tok": 510.96875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.572389632936508, "calib/avg_num_step_conf": 7.96875, "calib/ece": 0.20338937007874017, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.04803913690476186, "calib/mean_conf": 0.39015393700787404, "calib/mu_c": 0.41398437499999996, "calib/mu_w": 0.3659452380952381, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.044803149606299233, "calib/std_conf": 0.22311851579552777, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2315.0, "completions/max_terminated_length": 2315.0, "completions/mean_length": 494.0546875, "completions/mean_terminated_length": 494.0546875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.13333333333333333, "grad_norm": 0.005903557408601046, "learning_rate": 2.0833333333333334e-06, "loss": 0.086, "num_tokens": 26283721.0, "reward": 1.0950615406036377, "reward_std": 0.19785240292549133, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.7018418312072754, "rewards/format_reward_step": 0.98828125, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3900165860541165, "aux_distill/mean_u": 0.24280415522821283, "aux_distill/n_active_tok": 535.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5641883116883115, "calib/avg_num_step_conf": 8.5078125, "calib/ece": 0.1463548, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.06, "calib/gap": 0.05018551948051958, "calib/mean_conf": 0.41180520000000004, "calib/mu_c": 0.43990909090909097, "calib/mu_w": 0.3897235714285714, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.059080000000000014, "calib/std_conf": 0.2240988442918883, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 495.5390625, "completions/mean_terminated_length": 499.4409484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.1344, "grad_norm": 0.005371227860450745, "learning_rate": 2.0555555555555555e-06, "loss": 0.0593, "num_tokens": 26516043.0, "reward": 1.0582598447799683, "reward_std": 0.18028494715690613, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7102696895599365, "rewards/format_reward_step": 0.9765625, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3885276550427079, "aux_distill/mean_u": 0.234251354106569, "aux_distill/n_active_tok": 564.40625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5701081612586036, "calib/avg_num_step_conf": 8.8046875, "calib/ece": 0.11475806451612904, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.05241935483870968, "calib/gap": 0.054674532940019505, "calib/mean_conf": 0.4484677419354839, "calib/mu_c": 0.47823008849557513, "calib/mu_w": 0.42355555555555563, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.053790322580645186, "calib/std_conf": 0.21490074361104142, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 516.74609375, "completions/mean_terminated_length": 516.74609375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.13546666666666668, "grad_norm": 0.006517878267914057, "learning_rate": 2.027777777777778e-06, "loss": 0.1305, "num_tokens": 26752002.0, "reward": 1.060055136680603, "reward_std": 0.21711894869804382, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.7099539041519165, "rewards/format_reward_step": 0.96875, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4126989454962313, "aux_distill/mean_u": 0.2660821506582588, "aux_distill/n_active_tok": 519.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.518909688843916, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.18773809523809523, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.010289653427776435, "calib/mean_conf": 0.4425793650793651, "calib/mu_c": 0.44805084745762713, "calib/mu_w": 0.4377611940298507, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.08103174603174601, "calib/std_conf": 0.22334420205161598, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2783.0, "completions/max_terminated_length": 2783.0, "completions/mean_length": 516.5234375, "completions/mean_terminated_length": 520.590576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.13653333333333334, "grad_norm": 0.0053933002054691315, "learning_rate": 2.0000000000000003e-06, "loss": 0.052, "num_tokens": 26990896.0, "reward": 1.0699388980865479, "reward_std": 0.21272283792495728, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6945652365684509, "rewards/format_reward_step": 0.984375, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39896715339273214, "aux_distill/mean_u": 0.2534112287874238, "aux_distill/n_active_tok": 542.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5908012820512821, "calib/avg_num_step_conf": 8.484375, "calib/ece": 0.17408740157480312, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.03937007874015748, "calib/gap": 0.05774148717948724, "calib/mean_conf": 0.46150314960629923, "calib/mu_c": 0.48514533333333343, "calib/mu_w": 0.4274038461538462, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02251968503937008, "calib/std_conf": 0.20543797402423522, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 464.01171875, "completions/mean_terminated_length": 464.01171875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 0.1376, "grad_norm": 0.0062581077218055725, "learning_rate": 1.9722222222222224e-06, "loss": 0.0497, "num_tokens": 27212067.0, "reward": 1.1498541831970215, "reward_std": 0.1686449944972992, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7215834259986877, "rewards/format_reward_step": 0.9921875, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37909140018746257, "aux_distill/mean_u": 0.2216074330409185, "aux_distill/n_active_tok": 590.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5513638638638639, "calib/avg_num_step_conf": 9.234375, "calib/ece": 0.17133333333333334, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": 0.04842342342342343, "calib/mean_conf": 0.453921568627451, "calib/mu_c": 0.47500000000000003, "calib/mu_w": 0.4265765765765766, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030274509803921566, "calib/std_conf": 0.21585469360308243, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1955.0, "completions/max_terminated_length": 1955.0, "completions/mean_length": 490.41015625, "completions/mean_terminated_length": 492.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.13866666666666666, "grad_norm": 0.005617792718112469, "learning_rate": 1.944444444444445e-06, "loss": -0.0022, "num_tokens": 27442900.0, "reward": 1.137455701828003, "reward_std": 0.16030901670455933, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.7163175344467163, "rewards/format_reward_step": 0.99609375, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4019861314445734, "aux_distill/mean_u": 0.2449434224736697, "aux_distill/n_active_tok": 500.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5476300197498354, "calib/avg_num_step_conf": 8.234375, "calib/ece": 0.1700395256916996, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": 0.04127518104015798, "calib/mean_conf": 0.4837944664031621, "calib/mu_c": 0.5090816326530612, "calib/mu_w": 0.4678064516129032, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1332411067193676, "calib/std_conf": 0.21870222064088155, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1762.0, "completions/max_terminated_length": 1762.0, "completions/mean_length": 432.30078125, "completions/mean_terminated_length": 435.7047119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.13973333333333332, "grad_norm": 0.0073953066021203995, "learning_rate": 1.916666666666667e-06, "loss": -0.0345, "num_tokens": 27659777.0, "reward": 1.0458250045776367, "reward_std": 0.1774011254310608, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.7166500091552734, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3832178018055856, "aux_distill/mean_u": 0.29191975596897407, "aux_distill/n_active_tok": 606.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5628139043087497, "calib/avg_num_step_conf": 9.4765625, "calib/ece": 0.22648221343873517, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.0352868094105207, "calib/mean_conf": 0.4308300395256917, "calib/mu_c": 0.4443589743589743, "calib/mu_w": 0.4090721649484536, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020355731225296443, "calib/std_conf": 0.19875825676238895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2408.0, "completions/max_terminated_length": 2408.0, "completions/mean_length": 515.671875, "completions/mean_terminated_length": 515.671875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.1408, "grad_norm": 0.005856058094650507, "learning_rate": 1.888888888888889e-06, "loss": 0.0474, "num_tokens": 27897381.0, "reward": 1.144197702407837, "reward_std": 0.1931305229663849, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6946452856063843, "rewards/format_reward_step": 0.984375, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4176169987767935, "aux_distill/mean_u": 0.2787832522007128, "aux_distill/n_active_tok": 694.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5484593644447509, "calib/avg_num_step_conf": 10.84375, "calib/ece": 0.15043650793650792, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.027777777777777776, "calib/gap": 0.026141173226718217, "calib/mean_conf": 0.4370238095238095, "calib/mu_c": 0.45393258426966293, "calib/mu_w": 0.4277914110429447, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11714285714285715, "calib/std_conf": 0.20604618700460903, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2962.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 605.890625, "completions/mean_terminated_length": 605.890625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.14186666666666667, "grad_norm": 0.005014988128095865, "learning_rate": 1.8611111111111113e-06, "loss": 0.0875, "num_tokens": 28158833.0, "reward": 1.0234687328338623, "reward_std": 0.20991873741149902, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.7188124656677246, "rewards/format_reward_step": 0.98046875, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3869697102345526, "aux_distill/mean_u": 0.24196695079998468, "aux_distill/n_active_tok": 683.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5955873015873017, "calib/avg_num_step_conf": 10.65625, "calib/ece": 0.17980000000000002, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.043824701195219126, "calib/gap": 0.061628901587301665, "calib/mean_conf": 0.42410438247011956, "calib/mu_c": 0.45504160000000005, "calib/mu_w": 0.3934126984126984, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05294820717131475, "calib/std_conf": 0.22781403355626803, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 592.046875, "completions/mean_terminated_length": 594.36865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.14293333333333333, "grad_norm": 0.005612542387098074, "learning_rate": 1.8333333333333333e-06, "loss": 0.043, "num_tokens": 28419349.0, "reward": 1.089038372039795, "reward_std": 0.20408809185028076, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.709326982498169, "rewards/format_reward_step": 0.98046875, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4061538176611066, "aux_distill/mean_u": 0.22896098657794023, "aux_distill/n_active_tok": 645.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5387179487179486, "calib/avg_num_step_conf": 10.08984375, "calib/ece": 0.19088, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.032, "calib/gap": 0.036064102564102585, "calib/mean_conf": 0.46592, "calib/mu_c": 0.48323076923076924, "calib/mu_w": 0.44716666666666666, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0684, "calib/std_conf": 0.21351195189028646, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2642.0, "completions/max_terminated_length": 2642.0, "completions/mean_length": 578.28125, "completions/mean_terminated_length": 578.28125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.144, "grad_norm": 0.0058111692778766155, "learning_rate": 1.8055555555555557e-06, "loss": 0.1116, "num_tokens": 28673269.0, "reward": 1.0936968326568604, "reward_std": 0.19715605676174164, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7030187845230103, "rewards/format_reward_step": 0.9765625, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37998731853440404, "aux_distill/mean_u": 0.22645032493399606, "aux_distill/n_active_tok": 651.21875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.630528691166989, "calib/avg_num_step_conf": 10.1640625, "calib/ece": 0.1035856573705179, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.08312508059316565, "calib/mean_conf": 0.45848605577689244, "calib/mu_c": 0.5051818181818181, "calib/mu_w": 0.4220567375886524, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06191235059760956, "calib/std_conf": 0.20005004056450834, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 541.5, "completions/mean_terminated_length": 545.7637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.0, "epoch": 0.14506666666666668, "grad_norm": 0.005560385063290596, "learning_rate": 1.777777777777778e-06, "loss": 0.067, "num_tokens": 28920381.0, "reward": 1.074867844581604, "reward_std": 0.1932879090309143, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.7395796775817871, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39566820114851, "aux_distill/mean_u": 0.25699592562100493, "aux_distill/n_active_tok": 660.40625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5430187459599225, "calib/avg_num_step_conf": 10.34375, "calib/ece": 0.15425702811244976, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028112449799196786, "calib/gap": 0.009484809308338615, "calib/mean_conf": 0.4516867469879518, "calib/mu_c": 0.45663865546218474, "calib/mu_w": 0.4471538461538461, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0640160642570281, "calib/std_conf": 0.18965506171679003, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2625.0, "completions/max_terminated_length": 2625.0, "completions/mean_length": 533.89453125, "completions/mean_terminated_length": 535.98828125, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.14613333333333334, "grad_norm": 0.00525377131998539, "learning_rate": 1.75e-06, "loss": 0.0355, "num_tokens": 29164042.0, "reward": 1.0650525093078613, "reward_std": 0.20569221675395966, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6965113282203674, "rewards/format_reward_step": 0.96875, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3761101681739092, "aux_distill/mean_u": 0.22736046414342426, "aux_distill/n_active_tok": 639.875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5541148107121914, "calib/avg_num_step_conf": 9.9921875, "calib/ece": 0.16638888888888892, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.04157881019091664, "calib/mean_conf": 0.4409920634920635, "calib/mu_c": 0.457986577181208, "calib/mu_w": 0.41640776699029136, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008055555555555552, "calib/std_conf": 0.19486108181690132, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2415.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 558.171875, "completions/mean_terminated_length": 558.171875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.1472, "grad_norm": 0.005661411210894585, "learning_rate": 1.7222222222222224e-06, "loss": 0.0969, "num_tokens": 29411270.0, "reward": 1.1295913457870483, "reward_std": 0.20206280052661896, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7005890607833862, "rewards/format_reward_step": 0.9765625, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37806188873946667, "aux_distill/mean_u": 0.23127254913429085, "aux_distill/n_active_tok": 567.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5583129741545583, "calib/avg_num_step_conf": 8.84375, "calib/ece": 0.1582352941176471, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.044456731387424564, "calib/mean_conf": 0.47031372549019607, "calib/mu_c": 0.4879220779220779, "calib/mu_w": 0.44346534653465336, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01231372549019608, "calib/std_conf": 0.18664819483493628, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2649.0, "completions/max_terminated_length": 2649.0, "completions/mean_length": 472.3671875, "completions/mean_terminated_length": 472.3671875, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.14826666666666666, "grad_norm": 0.006207121070474386, "learning_rate": 1.6944444444444446e-06, "loss": 0.0564, "num_tokens": 29635292.0, "reward": 1.162093162536621, "reward_std": 0.17753392457962036, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7265300750732422, "rewards/format_reward_step": 0.99609375, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38487443793565035, "aux_distill/mean_u": 0.21633979470535, "aux_distill/n_active_tok": 583.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.503512645523886, "calib/avg_num_step_conf": 9.12109375, "calib/ece": 0.23739130434782607, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": -0.0036437842901111406, "calib/mean_conf": 0.4660079051383399, "calib/mu_c": 0.46465408805031444, "calib/mu_w": 0.4682978723404256, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03747035573122529, "calib/std_conf": 0.19472761947733852, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2532.0, "completions/max_terminated_length": 2532.0, "completions/mean_length": 525.58984375, "completions/mean_terminated_length": 527.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.14933333333333335, "grad_norm": 0.005536575336009264, "learning_rate": 1.6666666666666667e-06, "loss": 0.0398, "num_tokens": 29874859.0, "reward": 1.1472833156585693, "reward_std": 0.18610472977161407, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.689098060131073, "rewards/format_reward_step": 0.984375, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36641107965260744, "aux_distill/mean_u": 0.23129930774601012, "aux_distill/n_active_tok": 599.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.606989247311828, "calib/avg_num_step_conf": 9.3671875, "calib/ece": 0.1953359683794466, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.07142137096774198, "calib/mean_conf": 0.45968379446640323, "calib/mu_c": 0.4859375, "calib/mu_w": 0.41451612903225804, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011304347826086952, "calib/std_conf": 0.1996239023403811, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 574.19140625, "completions/mean_terminated_length": 576.4431762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.1504, "grad_norm": 0.005775410681962967, "learning_rate": 1.638888888888889e-06, "loss": 0.0661, "num_tokens": 30128948.0, "reward": 1.167884349822998, "reward_std": 0.19001102447509766, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7224875092506409, "rewards/format_reward_step": 0.98828125, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38335499819368124, "aux_distill/mean_u": 0.23487243225391793, "aux_distill/n_active_tok": 622.34375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5993613824192336, "calib/avg_num_step_conf": 9.7265625, "calib/ece": 0.10956521739130433, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.06858126721763086, "calib/mean_conf": 0.4528063241106719, "calib/mu_c": 0.48560606060606065, "calib/mu_w": 0.4170247933884298, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.020316205533596848, "calib/std_conf": 0.19820446493578275, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2296.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 571.28125, "completions/mean_terminated_length": 573.5216064453125, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.15146666666666667, "grad_norm": 0.005249022971838713, "learning_rate": 1.6111111111111113e-06, "loss": 0.0134, "num_tokens": 30380356.0, "reward": 1.109088659286499, "reward_std": 0.1934710443019867, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.7220836281776428, "rewards/format_reward_step": 0.9765625, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37024654168635607, "aux_distill/mean_u": 0.2144742621975383, "aux_distill/n_active_tok": 635.3125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6517263427109975, "calib/avg_num_step_conf": 9.90625, "calib/ece": 0.11812749003984062, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.09728069053708438, "calib/mean_conf": 0.470796812749004, "calib/mu_c": 0.5153676470588235, "calib/mu_w": 0.41808695652173916, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02354581673306773, "calib/std_conf": 0.19683391018067173, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2570.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 594.14453125, "completions/mean_terminated_length": 594.14453125, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 0.15253333333333333, "grad_norm": 0.006905294954776764, "learning_rate": 1.5833333333333333e-06, "loss": 0.1369, "num_tokens": 30639793.0, "reward": 1.1266040802001953, "reward_std": 0.19484950602054596, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.7414894104003906, "rewards/format_reward_step": 0.98046875, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3681063880212605, "aux_distill/mean_u": 0.18628796554254137, "aux_distill/n_active_tok": 611.34375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5160638644509612, "calib/avg_num_step_conf": 9.55078125, "calib/ece": 0.17539370078740157, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.04330708661417323, "calib/gap": 0.018401433691756397, "calib/mean_conf": 0.49011811023622043, "calib/mu_c": 0.49729032258064526, "calib/mu_w": 0.47888888888888886, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02763779527559055, "calib/std_conf": 0.2006396901477136, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2564.0, "completions/max_terminated_length": 2564.0, "completions/mean_length": 563.98828125, "completions/mean_terminated_length": 563.98828125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.1536, "grad_norm": 0.005503041669726372, "learning_rate": 1.5555555555555558e-06, "loss": 0.0812, "num_tokens": 30888302.0, "reward": 1.154140830039978, "reward_std": 0.18355412781238556, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.710625410079956, "rewards/format_reward_step": 0.9921875, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3513887752778828, "aux_distill/mean_u": 0.21696741285072296, "aux_distill/n_active_tok": 651.15625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5948333333333333, "calib/avg_num_step_conf": 10.171875, "calib/ece": 0.14659999999999995, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.028, "calib/gap": 0.06279999999999986, "calib/mean_conf": 0.48148, "calib/mu_c": 0.5065999999999998, "calib/mu_w": 0.4438, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.014039999999999987, "calib/std_conf": 0.18749669223748988, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2476.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 552.65234375, "completions/mean_terminated_length": 554.8196411132812, "completions/min_length": 0.0, "completions/min_terminated_length": 38.0, "epoch": 0.15466666666666667, "grad_norm": 0.005455151200294495, "learning_rate": 1.527777777777778e-06, "loss": 0.0276, "num_tokens": 31132485.0, "reward": 1.143038034439087, "reward_std": 0.20722763240337372, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7235761880874634, "rewards/format_reward_step": 0.9765625, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4044023398309946, "aux_distill/mean_u": 0.2234341881191083, "aux_distill/n_active_tok": 641.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5734043903057988, "calib/avg_num_step_conf": 10.00390625, "calib/ece": 0.11881422924901189, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.04743083003952569, "calib/gap": 0.04737152645603343, "calib/mean_conf": 0.5039525691699605, "calib/mu_c": 0.5305405405405406, "calib/mu_w": 0.48316901408450713, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.09201581027667986, "calib/std_conf": 0.2019455512148131, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2770.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 571.01953125, "completions/mean_terminated_length": 571.01953125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.15573333333333333, "grad_norm": 0.005942014046013355, "learning_rate": 1.5e-06, "loss": 0.0738, "num_tokens": 31385882.0, "reward": 1.0697505474090576, "reward_std": 0.21587388217449188, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.7215324640274048, "rewards/format_reward_step": 0.984375, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37331899954006076, "aux_distill/mean_u": 0.2227285418246145, "aux_distill/n_active_tok": 642.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6430228810828231, "calib/avg_num_step_conf": 10.21875, "calib/ece": 0.13431230158730162, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05952380952380952, "calib/gap": 0.10045484369964552, "calib/mean_conf": 0.5290210317460317, "calib/mu_c": 0.5868224299065421, "calib/mu_w": 0.4863675862068966, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1193650793650794, "calib/std_conf": 0.21020622318098686, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 612.515625, "completions/mean_terminated_length": 617.3385620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.1568, "grad_norm": 0.004775840323418379, "learning_rate": 1.4722222222222225e-06, "loss": 0.0178, "num_tokens": 31646366.0, "reward": 1.0663350820541382, "reward_std": 0.2053927630186081, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.7342325448989868, "rewards/format_reward_step": 0.98046875, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3720237333327532, "aux_distill/mean_u": 0.23919748396660928, "aux_distill/n_active_tok": 592.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5598808341608739, "calib/avg_num_step_conf": 9.203125, "calib/ece": 0.170748031496063, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": 0.03987884806355502, "calib/mean_conf": 0.5388582677165354, "calib/mu_c": 0.5537735849056603, "calib/mu_w": 0.5138947368421053, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.041811023622047236, "calib/std_conf": 0.20432305069623957, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2083.0, "completions/max_terminated_length": 2083.0, "completions/mean_length": 541.83984375, "completions/mean_terminated_length": 541.83984375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.15786666666666666, "grad_norm": 0.0056559499353170395, "learning_rate": 1.4444444444444445e-06, "loss": 0.0557, "num_tokens": 31890189.0, "reward": 1.1733251810073853, "reward_std": 0.2118576318025589, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.7294628620147705, "rewards/format_reward_step": 0.9921875, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37415668834000826, "aux_distill/mean_u": 0.18526994675126796, "aux_distill/n_active_tok": 631.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6356880849987352, "calib/avg_num_step_conf": 9.87109375, "calib/ece": 0.0841269841269841, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.08855805717176818, "calib/mean_conf": 0.5216666666666666, "calib/mu_c": 0.5631343283582089, "calib/mu_w": 0.4745762711864407, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.037023809523809514, "calib/std_conf": 0.19665355081365565, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 592.55078125, "completions/mean_terminated_length": 597.216552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.15893333333333334, "grad_norm": 0.005320643540471792, "learning_rate": 1.4166666666666667e-06, "loss": 0.0261, "num_tokens": 32146338.0, "reward": 1.126164436340332, "reward_std": 0.19407425820827484, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7445164322853088, "rewards/format_reward_step": 0.984375, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4086106764152646, "aux_distill/mean_u": 0.24081592924632741, "aux_distill/n_active_tok": 558.3125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5217777777777778, "calib/avg_num_step_conf": 8.80078125, "calib/ece": 0.16298039215686275, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.058823529411764705, "calib/gap": 0.01598095238095243, "calib/mean_conf": 0.5743529411764706, "calib/mu_c": 0.5809333333333334, "calib/mu_w": 0.564952380952381, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07454901960784314, "calib/std_conf": 0.19403323179385817, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2204.0, "completions/max_terminated_length": 2204.0, "completions/mean_length": 487.75, "completions/mean_terminated_length": 489.66278076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.16, "grad_norm": 0.005738874897360802, "learning_rate": 1.3888888888888892e-06, "loss": 0.044, "num_tokens": 32376162.0, "reward": 1.1534371376037598, "reward_std": 0.18325306475162506, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7248429656028748, "rewards/format_reward_step": 0.99609375, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38912961538881063, "aux_distill/mean_u": 0.23468656230850374, "aux_distill/n_active_tok": 642.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.564843847986956, "calib/avg_num_step_conf": 10.04296875, "calib/ece": 0.17541501976284588, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.07509881422924901, "calib/gap": 0.05584409883356345, "calib/mean_conf": 0.5452964426877469, "calib/mu_c": 0.574873949579832, "calib/mu_w": 0.5190298507462685, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.12517786561264824, "calib/std_conf": 0.23259069179202796, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2558.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 588.3203125, "completions/mean_terminated_length": 588.3203125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.16106666666666666, "grad_norm": 0.0050440216436982155, "learning_rate": 1.3611111111111112e-06, "loss": 0.033, "num_tokens": 32633796.0, "reward": 1.0779383182525635, "reward_std": 0.2225310355424881, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.7066578269004822, "rewards/format_reward_step": 0.984375, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3880243906751275, "aux_distill/mean_u": 0.19693858332008352, "aux_distill/n_active_tok": 622.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.572785614436396, "calib/avg_num_step_conf": 9.7265625, "calib/ece": 0.1451442231075697, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.08366533864541832, "calib/gap": 0.04836035074342371, "calib/mean_conf": 0.6034709163346613, "calib/mu_c": 0.6269767441860465, "calib/mu_w": 0.5786163934426228, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11733545816733064, "calib/std_conf": 0.2129893996734671, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2883.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 587.45703125, "completions/mean_terminated_length": 587.45703125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.16213333333333332, "grad_norm": 0.005218581296503544, "learning_rate": 1.3333333333333334e-06, "loss": 0.072, "num_tokens": 32889577.0, "reward": 1.0956348180770874, "reward_std": 0.2619733214378357, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.7068947553634644, "rewards/format_reward_step": 0.98046875, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4142996594309807, "aux_distill/mean_u": 0.29708617139507415, "aux_distill/n_active_tok": 627.0, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5845899607519108, "calib/avg_num_step_conf": 9.96875, "calib/ece": 0.13467213114754095, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.11885245901639344, "calib/gap": 0.06643117813124011, "calib/mean_conf": 0.6192622950819673, "calib/mu_c": 0.6473049645390071, "calib/mu_w": 0.580873786407767, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.08803278688524588, "calib/std_conf": 0.2170373089963381, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2735.0, "completions/max_terminated_length": 2735.0, "completions/mean_length": 632.4375, "completions/mean_terminated_length": 637.4172973632812, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1632, "grad_norm": 0.005001327954232693, "learning_rate": 1.3055555555555556e-06, "loss": 0.0707, "num_tokens": 33158801.0, "reward": 1.104444980621338, "reward_std": 0.21987763047218323, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.7049835920333862, "rewards/format_reward_step": 0.953125, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3739353087730706, "aux_distill/mean_u": 0.22378105635695517, "aux_distill/n_active_tok": 589.40625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6106679960119641, "calib/avg_num_step_conf": 9.3203125, "calib/ece": 0.19649606299212602, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.1062992125984252, "calib/gap": 0.07342597208374879, "calib/mean_conf": 0.6371259842519686, "calib/mu_c": 0.6764406779661017, "calib/mu_w": 0.603014705882353, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18452755905511814, "calib/std_conf": 0.20850236472633155, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1530.0, "completions/max_terminated_length": 1530.0, "completions/mean_length": 540.6484375, "completions/mean_terminated_length": 542.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.16426666666666667, "grad_norm": 0.005266482941806316, "learning_rate": 1.2777777777777779e-06, "loss": 0.0241, "num_tokens": 33401647.0, "reward": 1.081038475036621, "reward_std": 0.24052487313747406, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.7089519500732422, "rewards/format_reward_step": 0.9921875, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3764974372461438, "aux_distill/mean_u": 0.19954199441576917, "aux_distill/n_active_tok": 555.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.568870523415978, "calib/avg_num_step_conf": 8.7265625, "calib/ece": 0.19988379446640314, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16600790513833993, "calib/gap": 0.055773691460055175, "calib/mean_conf": 0.6720577075098814, "calib/mu_c": 0.7011570247933884, "calib/mu_w": 0.6453833333333332, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1968403162055336, "calib/std_conf": 0.20734349417086043, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2756.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 504.30859375, "completions/mean_terminated_length": 506.28631591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.16533333333333333, "grad_norm": 0.006387793924659491, "learning_rate": 1.25e-06, "loss": 0.0366, "num_tokens": 33637966.0, "reward": 1.0752594470977783, "reward_std": 0.22296142578125, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6895813345909119, "rewards/format_reward_step": 0.98828125, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3770381882786751, "aux_distill/mean_u": 0.1989017791072949, "aux_distill/n_active_tok": 598.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6612547288776797, "calib/avg_num_step_conf": 9.36328125, "calib/ece": 0.1534845238095238, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.09523809523809523, "calib/gap": 0.12217446406052968, "calib/mean_conf": 0.6170710317460317, "calib/mu_c": 0.6800975409836066, "calib/mu_w": 0.557923076923077, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.1432142857142857, "calib/std_conf": 0.22878321806284102, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2122.0, "completions/max_terminated_length": 2122.0, "completions/mean_length": 579.515625, "completions/mean_terminated_length": 581.7882690429688, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.1664, "grad_norm": 0.005374233238399029, "learning_rate": 1.2222222222222223e-06, "loss": 0.0421, "num_tokens": 33891082.0, "reward": 1.0824484825134277, "reward_std": 0.2440761774778366, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.719584584236145, "rewards/format_reward_step": 0.96875, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3959145718254149, "aux_distill/mean_u": 0.2793759608083668, "aux_distill/n_active_tok": 662.40625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.596949221949222, "calib/avg_num_step_conf": 10.5, "calib/ece": 0.14449392712550607, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.1214574898785425, "calib/gap": 0.07183660933660929, "calib/mean_conf": 0.6466801619433198, "calib/mu_c": 0.6754729729729729, "calib/mu_w": 0.6036363636363636, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09599190283400812, "calib/std_conf": 0.21002870184418684, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 587.05078125, "completions/mean_terminated_length": 589.3529663085938, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.16746666666666668, "grad_norm": 0.005070784594863653, "learning_rate": 1.1944444444444446e-06, "loss": 0.1324, "num_tokens": 34145095.0, "reward": 1.1265051364898682, "reward_std": 0.24920831620693207, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7178539037704468, "rewards/format_reward_step": 0.95703125, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38080530194565654, "aux_distill/mean_u": 0.2144786379452595, "aux_distill/n_active_tok": 595.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5020163831127914, "calib/avg_num_step_conf": 9.35546875, "calib/ece": 0.18814229249011852, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.17786561264822134, "calib/gap": 0.017797101449275377, "calib/mean_conf": 0.6803162055335968, "calib/mu_c": 0.6884057971014492, "calib/mu_w": 0.6706086956521738, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16150197628458496, "calib/std_conf": 0.22398453065900517, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1801.0, "completions/max_terminated_length": 1801.0, "completions/mean_length": 542.1484375, "completions/mean_terminated_length": 544.2745361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.16853333333333334, "grad_norm": 0.006110852584242821, "learning_rate": 1.1666666666666668e-06, "loss": 0.0714, "num_tokens": 34389125.0, "reward": 1.1058812141418457, "reward_std": 0.2672828137874603, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6844187378883362, "rewards/format_reward_step": 0.98828125, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36747160041704774, "aux_distill/mean_u": 0.18348273788386177, "aux_distill/n_active_tok": 580.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5950610221205188, "calib/avg_num_step_conf": 9.0625, "calib/ece": 0.174318253968254, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.14682539682539683, "calib/gap": 0.06862860411899308, "calib/mean_conf": 0.6717928571428572, "calib/mu_c": 0.7028391304347826, "calib/mu_w": 0.6342105263157896, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1492460317460318, "calib/std_conf": 0.2146011178681263, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 535.01953125, "completions/mean_terminated_length": 535.01953125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.1696, "grad_norm": 0.005810961127281189, "learning_rate": 1.138888888888889e-06, "loss": 0.0909, "num_tokens": 34630874.0, "reward": 1.118455171585083, "reward_std": 0.22580690681934357, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.713472843170166, "rewards/format_reward_step": 0.984375, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3862662399187684, "aux_distill/mean_u": 0.22606274958942982, "aux_distill/n_active_tok": 640.09375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5912081339712919, "calib/avg_num_step_conf": 10.0, "calib/ece": 0.23280487804878053, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.16260162601626016, "calib/gap": 0.07370414673046244, "calib/mean_conf": 0.6848373983739838, "calib/mu_c": 0.7243859649122807, "calib/mu_w": 0.6506818181818183, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22711382113821144, "calib/std_conf": 0.21200498053605563, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2723.0, "completions/max_terminated_length": 2723.0, "completions/mean_length": 591.74609375, "completions/mean_terminated_length": 594.0667114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.17066666666666666, "grad_norm": 0.004924526903778315, "learning_rate": 1.111111111111111e-06, "loss": 0.0339, "num_tokens": 34887201.0, "reward": 1.0270812511444092, "reward_std": 0.30026695132255554, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6596312522888184, "rewards/format_reward_step": 0.94921875, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3662711353972554, "aux_distill/mean_u": 0.20840195328305358, "aux_distill/n_active_tok": 550.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.575003621613791, "calib/avg_num_step_conf": 8.703125, "calib/ece": 0.1333333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.13333333333333333, "calib/gap": 0.048624511082138056, "calib/mean_conf": 0.6710588235294118, "calib/mu_c": 0.6859322033898305, "calib/mu_w": 0.6373076923076925, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05513725490196077, "calib/std_conf": 0.20934270727401036, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 519.50390625, "completions/mean_terminated_length": 521.5411987304688, "completions/min_length": 0.0, "completions/min_terminated_length": 102.0, "epoch": 0.17173333333333332, "grad_norm": 0.005698539316654205, "learning_rate": 1.0833333333333335e-06, "loss": 0.0608, "num_tokens": 35124114.0, "reward": 1.2242445945739746, "reward_std": 0.2015002965927124, "rewards/accuracy_reward_step": 0.69140625, "rewards/final_brier_reward_step": 0.7609890699386597, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3748155296780169, "aux_distill/mean_u": 0.1932356987573526, "aux_distill/n_active_tok": 531.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5960806428873537, "calib/avg_num_step_conf": 8.30859375, "calib/ece": 0.11890588235294117, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.15294117647058825, "calib/gap": 0.06314239390948828, "calib/mean_conf": 0.7045450980392156, "calib/mu_c": 0.7248497109826589, "calib/mu_w": 0.6617073170731707, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.07250980392156862, "calib/std_conf": 0.1852319503140515, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2561.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 497.8046875, "completions/mean_terminated_length": 497.8046875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.1728, "grad_norm": 0.005967023782432079, "learning_rate": 1.0555555555555557e-06, "loss": 0.0557, "num_tokens": 35355696.0, "reward": 1.2153667211532593, "reward_std": 0.2070452868938446, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7666709423065186, "rewards/format_reward_step": 0.98828125, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3876973898150027, "aux_distill/mean_u": 0.2564975633289124, "aux_distill/n_active_tok": 627.0625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.6148873783819806, "calib/avg_num_step_conf": 10.14453125, "calib/ece": 0.2239591836734694, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.15918367346938775, "calib/gap": 0.07605957616953207, "calib/mean_conf": 0.7071428571428572, "calib/mu_c": 0.7453278688524589, "calib/mu_w": 0.6692682926829269, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21657142857142858, "calib/std_conf": 0.19477459499930388, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2760.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 573.625, "completions/mean_terminated_length": 578.1417236328125, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.17386666666666667, "grad_norm": 0.00539444712921977, "learning_rate": 1.0277777777777777e-06, "loss": 0.05, "num_tokens": 35607376.0, "reward": 1.0547908544540405, "reward_std": 0.2592874765396118, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.675987958908081, "rewards/format_reward_step": 0.95703125, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38780549727380276, "aux_distill/mean_u": 0.20125896250529246, "aux_distill/n_active_tok": 601.59375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6642457719585378, "calib/avg_num_step_conf": 9.66796875, "calib/ece": 0.09895800000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.164, "calib/gap": 0.13103668848881622, "calib/mean_conf": 0.685602, "calib/mu_c": 0.7348717948717949, "calib/mu_w": 0.6038351063829787, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.08028000000000002, "calib/std_conf": 0.220712434167176, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2589.0, "completions/max_terminated_length": 2589.0, "completions/mean_length": 571.27734375, "completions/mean_terminated_length": 575.7755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.17493333333333333, "grad_norm": 0.005605064332485199, "learning_rate": 1.0000000000000002e-06, "loss": 0.0258, "num_tokens": 35859759.0, "reward": 1.1609622240066528, "reward_std": 0.20984989404678345, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7477058172225952, "rewards/format_reward_step": 0.96484375, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41156348772346973, "aux_distill/mean_u": 0.2492245281580815, "aux_distill/n_active_tok": 574.5625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.568014027316353, "calib/avg_num_step_conf": 8.984375, "calib/ece": 0.1940694117647059, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.20784313725490197, "calib/gap": 0.06060826873385006, "calib/mean_conf": 0.693538431372549, "calib/mu_c": 0.723486046511628, "calib/mu_w": 0.6628777777777779, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19086274509803924, "calib/std_conf": 0.21402834816245078, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1605.0, "completions/max_terminated_length": 1605.0, "completions/mean_length": 554.39453125, "completions/mean_terminated_length": 556.5686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.176, "grad_norm": 0.005580270662903786, "learning_rate": 9.722222222222224e-07, "loss": 0.0407, "num_tokens": 36107260.0, "reward": 1.100243091583252, "reward_std": 0.2129260003566742, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6965799331665039, "rewards/format_reward_step": 0.99609375, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3936434802599251, "aux_distill/mean_u": 0.25069887011889913, "aux_distill/n_active_tok": 631.53125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6650324675324676, "calib/avg_num_step_conf": 9.91015625, "calib/ece": 0.1648344, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.24, "calib/gap": 0.13665610389610383, "calib/mean_conf": 0.7110856, "calib/mu_c": 0.7712142857142857, "calib/mu_w": 0.6345581818181819, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.15796000000000004, "calib/std_conf": 0.22916760092264352, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2842.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 622.55859375, "completions/mean_terminated_length": 622.55859375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.17706666666666668, "grad_norm": 0.004876760300248861, "learning_rate": 9.444444444444445e-07, "loss": 0.0771, "num_tokens": 36372819.0, "reward": 1.1162844896316528, "reward_std": 0.23633524775505066, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.7208502292633057, "rewards/format_reward_step": 0.96484375, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3998649688437581, "aux_distill/mean_u": 0.266047704954752, "aux_distill/n_active_tok": 591.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5493181204428841, "calib/avg_num_step_conf": 9.24609375, "calib/ece": 0.16944031620553357, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2924901185770751, "calib/gap": 0.04313105590062116, "calib/mean_conf": 0.7466861660079053, "calib/mu_c": 0.7623701863354038, "calib/mu_w": 0.7192391304347826, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1398814229249012, "calib/std_conf": 0.21671540363402186, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 530.41015625, "completions/mean_terminated_length": 530.41015625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.17813333333333334, "grad_norm": 0.005584842525422573, "learning_rate": 9.166666666666666e-07, "loss": 0.0538, "num_tokens": 36614212.0, "reward": 1.1690298318862915, "reward_std": 0.2310115396976471, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7208719849586487, "rewards/format_reward_step": 0.98828125, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3931912127882242, "aux_distill/mean_u": 0.24273921144412486, "aux_distill/n_active_tok": 600.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6109868421052631, "calib/avg_num_step_conf": 9.703125, "calib/ece": 0.14272896825396825, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.24206349206349206, "calib/gap": 0.08523226315789478, "calib/mean_conf": 0.7120329365079365, "calib/mu_c": 0.7458552631578947, "calib/mu_w": 0.660623, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1257936507936508, "calib/std_conf": 0.22484954243370556, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2173.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 607.05859375, "completions/mean_terminated_length": 611.8385620117188, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.1792, "grad_norm": 0.005158184561878443, "learning_rate": 8.88888888888889e-07, "loss": 0.0627, "num_tokens": 36874291.0, "reward": 1.1490778923034668, "reward_std": 0.2613195478916168, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.723936915397644, "rewards/format_reward_step": 0.98046875, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3681354848667979, "aux_distill/mean_u": 0.20839590022768617, "aux_distill/n_active_tok": 556.25, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5548625083836353, "calib/avg_num_step_conf": 8.69921875, "calib/ece": 0.24605263157894747, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.30364372469635625, "calib/gap": 0.035121730382293936, "calib/mean_conf": 0.7590485829959515, "calib/mu_c": 0.7739788732394366, "calib/mu_w": 0.7388571428571427, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2151012145748989, "calib/std_conf": 0.22427603705609578, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2834.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 566.71875, "completions/mean_terminated_length": 573.4387817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.18026666666666666, "grad_norm": 0.0056145042181015015, "learning_rate": 8.611111111111112e-07, "loss": 0.0222, "num_tokens": 37123555.0, "reward": 1.0919448137283325, "reward_std": 0.26493191719055176, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.664358377456665, "rewards/format_reward_step": 0.96484375, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3833466558717191, "aux_distill/mean_u": 0.22643499535792744, "aux_distill/n_active_tok": 613.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5134576948700866, "calib/avg_num_step_conf": 9.578125, "calib/ece": 0.2185335968379447, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.32806324110671936, "calib/gap": 0.018214856762158615, "calib/mean_conf": 0.7722173913043479, "calib/mu_c": 0.7790569620253165, "calib/mu_w": 0.7608421052631579, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18312252964426884, "calib/std_conf": 0.2049528726652741, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2661.0, "completions/max_terminated_length": 2661.0, "completions/mean_length": 587.5625, "completions/mean_terminated_length": 587.5625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 0.18133333333333335, "grad_norm": 0.005056075286120176, "learning_rate": 8.333333333333333e-07, "loss": 0.0434, "num_tokens": 37378123.0, "reward": 1.1505274772644043, "reward_std": 0.24166499078273773, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.6994924545288086, "rewards/format_reward_step": 0.984375, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35855999030172825, "aux_distill/mean_u": 0.21844478544140802, "aux_distill/n_active_tok": 546.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5903972474194558, "calib/avg_num_step_conf": 8.63671875, "calib/ece": 0.30744291338582685, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.29133858267716534, "calib/gap": 0.07547461370034403, "calib/mean_conf": 0.7531082677165355, "calib/mu_c": 0.794411304347826, "calib/mu_w": 0.718936690647482, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.3038976377952757, "calib/std_conf": 0.21654670017537028, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 546.68359375, "completions/mean_terminated_length": 548.8274536132812, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.1824, "grad_norm": 0.005854424089193344, "learning_rate": 8.055555555555557e-07, "loss": 0.0422, "num_tokens": 37624970.0, "reward": 1.0410816669464111, "reward_std": 0.2533574104309082, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6446633338928223, "rewards/format_reward_step": 0.98828125, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3715131198987365, "aux_distill/mean_u": 0.1825784954079074, "aux_distill/n_active_tok": 528.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6015790870488323, "calib/avg_num_step_conf": 8.26171875, "calib/ece": 0.2094071146245059, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4189723320158103, "calib/gap": 0.08736332271762215, "calib/mean_conf": 0.8106719367588934, "calib/mu_c": 0.8438216560509555, "calib/mu_w": 0.7564583333333333, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19976284584980233, "calib/std_conf": 0.19861363142530922, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2886.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 519.671875, "completions/mean_terminated_length": 519.671875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.18346666666666667, "grad_norm": 0.005732356104999781, "learning_rate": 7.777777777777779e-07, "loss": 0.0639, "num_tokens": 37861358.0, "reward": 1.161544919013977, "reward_std": 0.2524621784687042, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7215273380279541, "rewards/format_reward_step": 0.98828125, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35221074521541595, "aux_distill/mean_u": 0.1902758039051572, "aux_distill/n_active_tok": 588.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5617761917586857, "calib/avg_num_step_conf": 9.19140625, "calib/ece": 0.21011269841269842, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.30158730158730157, "calib/gap": 0.047811096148666965, "calib/mean_conf": 0.7680619047619047, "calib/mu_c": 0.7858962025316456, "calib/mu_w": 0.7380851063829786, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.17559523809523814, "calib/std_conf": 0.2136894720231082, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2442.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 553.91796875, "completions/mean_terminated_length": 558.279541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.18453333333333333, "grad_norm": 0.006491128820925951, "learning_rate": 7.5e-07, "loss": 0.013, "num_tokens": 38106321.0, "reward": 1.1526883840560913, "reward_std": 0.27811115980148315, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7077205181121826, "rewards/format_reward_step": 0.98046875, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3911949987523258, "aux_distill/mean_u": 0.2356908682254667, "aux_distill/n_active_tok": 561.78125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.46328694068678455, "calib/avg_num_step_conf": 8.96875, "calib/ece": 0.3510068548387096, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.3588709677419355, "calib/gap": -0.0404008064516127, "calib/mean_conf": 0.7588318548387097, "calib/mu_c": 0.7386314516129033, "calib/mu_w": 0.779032258064516, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.3049193548387096, "calib/std_conf": 0.23482360760128473, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 593.02734375, "completions/mean_terminated_length": 597.6968383789062, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.1856, "grad_norm": 0.005600844509899616, "learning_rate": 7.222222222222222e-07, "loss": 0.0809, "num_tokens": 38362368.0, "reward": 1.0135700702667236, "reward_std": 0.30040162801742554, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5818275809288025, "rewards/format_reward_step": 0.9609375, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38108411943539977, "aux_distill/mean_u": 0.22307086978765725, "aux_distill/n_active_tok": 589.625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6455332342871908, "calib/avg_num_step_conf": 9.2109375, "calib/ece": 0.3225364, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.34, "calib/gap": 0.10556283831793811, "calib/mean_conf": 0.7571436, "calib/mu_c": 0.8149920353982301, "calib/mu_w": 0.709429197080292, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.31384, "calib/std_conf": 0.2222298836318824, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2844.0, "completions/max_terminated_length": 2844.0, "completions/mean_length": 586.3671875, "completions/mean_terminated_length": 588.6666870117188, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.18666666666666668, "grad_norm": 0.005803854204714298, "learning_rate": 6.944444444444446e-07, "loss": 0.0866, "num_tokens": 38618302.0, "reward": 1.0197913646697998, "reward_std": 0.29834282398223877, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6372389197349548, "rewards/format_reward_step": 0.9609375, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41542663518339396, "aux_distill/mean_u": 0.27843300225684847, "aux_distill/n_active_tok": 519.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5742283950617284, "calib/avg_num_step_conf": 8.08984375, "calib/ece": 0.3078478431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4, "calib/gap": 0.0673043518518519, "calib/mean_conf": 0.7817992156862745, "calib/mu_c": 0.8134718518518519, "calib/mu_w": 0.7461675, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2801176470588235, "calib/std_conf": 0.2326337950247559, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2417.0, "completions/max_terminated_length": 2417.0, "completions/mean_length": 552.08984375, "completions/mean_terminated_length": 552.08984375, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.18773333333333334, "grad_norm": 0.005393526051193476, "learning_rate": 6.666666666666667e-07, "loss": 0.0335, "num_tokens": 38863701.0, "reward": 1.09013032913208, "reward_std": 0.2410275936126709, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6607294082641602, "rewards/format_reward_step": 0.9921875, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38257669610902667, "aux_distill/mean_u": 0.20653966698340326, "aux_distill/n_active_tok": 523.875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6535557563242126, "calib/avg_num_step_conf": 8.1796875, "calib/ece": 0.2193596837944664, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.41106719367588934, "calib/gap": 0.12427387712958171, "calib/mean_conf": 0.7853043478260869, "calib/mu_c": 0.8363892617449663, "calib/mu_w": 0.7121153846153846, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.20786561264822134, "calib/std_conf": 0.22668207216011574, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2861.0, "completions/max_terminated_length": 2861.0, "completions/mean_length": 523.03125, "completions/mean_terminated_length": 527.1495971679688, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.1888, "grad_norm": 0.006122148595750332, "learning_rate": 6.388888888888889e-07, "loss": 0.0196, "num_tokens": 39101429.0, "reward": 1.1449565887451172, "reward_std": 0.26474517583847046, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7196007966995239, "rewards/format_reward_step": 0.98828125, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39593329932540655, "aux_distill/mean_u": 0.24124242091843112, "aux_distill/n_active_tok": 575.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6205788868600798, "calib/avg_num_step_conf": 8.98828125, "calib/ece": 0.2195551587301587, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.4365079365079365, "calib/gap": 0.07042708460133884, "calib/mean_conf": 0.8046511904761906, "calib/mu_c": 0.8306421383647797, "calib/mu_w": 0.7602150537634409, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.19662698412698412, "calib/std_conf": 0.21381766774115116, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2646.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 542.3046875, "completions/mean_terminated_length": 542.3046875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.18986666666666666, "grad_norm": 0.006403418257832527, "learning_rate": 6.111111111111112e-07, "loss": 0.0719, "num_tokens": 39346331.0, "reward": 1.15155029296875, "reward_std": 0.28153494000434875, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7054443359375, "rewards/format_reward_step": 0.9765625, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40228676423430443, "aux_distill/mean_u": 0.2207415109289261, "aux_distill/n_active_tok": 498.84375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5345121867959309, "calib/avg_num_step_conf": 7.80078125, "calib/ece": 0.25700524193548396, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.40725806451612906, "calib/gap": 0.021218126578821717, "calib/mean_conf": 0.8117850806451613, "calib/mu_c": 0.8200841059602649, "calib/mu_w": 0.7988659793814432, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.2299596774193549, "calib/std_conf": 0.20379758532788697, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1941.0, "completions/max_terminated_length": 1941.0, "completions/mean_length": 529.1640625, "completions/mean_terminated_length": 533.3306884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.19093333333333334, "grad_norm": 0.006466962397098541, "learning_rate": 5.833333333333334e-07, "loss": 0.0255, "num_tokens": 39588061.0, "reward": 1.1131529808044434, "reward_std": 0.30939981341362, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6677122712135315, "rewards/format_reward_step": 0.96875, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3644904945977032, "aux_distill/mean_u": 0.2252707458553707, "aux_distill/n_active_tok": 622.5, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5752621412803532, "calib/avg_num_step_conf": 9.95703125, "calib/ece": 0.21748987854251006, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.3481781376518219, "calib/gap": 0.07612858719646798, "calib/mean_conf": 0.7786234817813765, "calib/mu_c": 0.8082119205298013, "calib/mu_w": 0.7320833333333333, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19238866396761128, "calib/std_conf": 0.21379293004794264, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2986.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 643.69921875, "completions/mean_terminated_length": 651.33203125, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.192, "grad_norm": 0.005174067337065935, "learning_rate": 5.555555555555555e-07, "loss": 0.0508, "num_tokens": 39856704.0, "reward": 1.1200428009033203, "reward_std": 0.2508864402770996, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6932105422019958, "rewards/format_reward_step": 0.95703125, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3895543343387544, "aux_distill/mean_u": 0.19215650873399412, "aux_distill/n_active_tok": 504.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5619111280875988, "calib/avg_num_step_conf": 8.0, "calib/ece": 0.34328047808764944, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.38645418326693226, "calib/gap": 0.037288349885408634, "calib/mean_conf": 0.800810358565737, "calib/mu_c": 0.8204201680672268, "calib/mu_w": 0.7831318181818182, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.33499362549800804, "calib/std_conf": 0.20024760303729527, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2805.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 530.27734375, "completions/mean_terminated_length": 532.3568725585938, "completions/min_length": 0.0, "completions/min_terminated_length": 213.0, "epoch": 0.19306666666666666, "grad_norm": 0.006468856241554022, "learning_rate": 5.277777777777779e-07, "loss": 0.0567, "num_tokens": 40098719.0, "reward": 1.0248982906341553, "reward_std": 0.2814429998397827, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6083903312683105, "rewards/format_reward_step": 0.9765625, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36977091105654836, "aux_distill/mean_u": 0.20276270446751285, "aux_distill/n_active_tok": 520.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6068609022556389, "calib/avg_num_step_conf": 8.25390625, "calib/ece": 0.27892204724409453, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4409448818897638, "calib/gap": 0.07826878446115282, "calib/mean_conf": 0.8070622047244094, "calib/mu_c": 0.8421907142857142, "calib/mu_w": 0.7639219298245614, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26740157480314963, "calib/std_conf": 0.21123016812436582, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1445.0, "completions/max_terminated_length": 1445.0, "completions/mean_length": 515.19921875, "completions/mean_terminated_length": 517.2196655273438, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.19413333333333332, "grad_norm": 0.0057126544415950775, "learning_rate": 5.000000000000001e-07, "loss": 0.0388, "num_tokens": 40336770.0, "reward": 1.104158639907837, "reward_std": 0.26528361439704895, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6731610298156738, "rewards/format_reward_step": 0.98828125, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3720056042075157, "aux_distill/mean_u": 0.22487873949963205, "aux_distill/n_active_tok": 632.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5904737903225807, "calib/avg_num_step_conf": 9.875, "calib/ece": 0.2851309523809524, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.376984126984127, "calib/gap": 0.09010005040322577, "calib/mean_conf": 0.7693134920634921, "calib/mu_c": 0.8136484374999999, "calib/mu_w": 0.7235483870967742, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.27325396825396825, "calib/std_conf": 0.2307191159242876, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 610.61328125, "completions/mean_terminated_length": 613.0078735351562, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.1952, "grad_norm": 0.005356924142688513, "learning_rate": 4.7222222222222226e-07, "loss": 0.0619, "num_tokens": 40599767.0, "reward": 1.0603950023651123, "reward_std": 0.3010280430316925, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6520398855209351, "rewards/format_reward_step": 0.96875, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3988386648707092, "aux_distill/mean_u": 0.2309760148236864, "aux_distill/n_active_tok": 556.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5913656069364162, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.16279525691699614, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.36363636363636365, "calib/gap": 0.05032471098265889, "calib/mean_conf": 0.7895367588932807, "calib/mu_c": 0.8054497109826588, "calib/mu_w": 0.7551249999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13426877470355741, "calib/std_conf": 0.19784592767953718, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2464.0, "completions/max_terminated_length": 2464.0, "completions/mean_length": 550.47265625, "completions/mean_terminated_length": 550.47265625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.19626666666666667, "grad_norm": 0.006214865017682314, "learning_rate": 4.444444444444445e-07, "loss": 0.1089, "num_tokens": 40845968.0, "reward": 1.2052152156829834, "reward_std": 0.2249186486005783, "rewards/accuracy_reward_step": 0.67578125, "rewards/final_brier_reward_step": 0.7463681697845459, "rewards/format_reward_step": 0.98828125, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37272774055600166, "aux_distill/mean_u": 0.20732307433212496, "aux_distill/n_active_tok": 603.03125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5939581667998934, "calib/avg_num_step_conf": 9.98828125, "calib/ece": 0.28278987854251014, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.4048582995951417, "calib/gap": 0.06050807354116705, "calib/mean_conf": 0.7759955465587044, "calib/mu_c": 0.8024525179856115, "calib/mu_w": 0.7419444444444444, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.24801619433198382, "calib/std_conf": 0.24001522037686593, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 570.33203125, "completions/mean_terminated_length": 581.6932373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.19733333333333333, "grad_norm": 0.005448437761515379, "learning_rate": 4.1666666666666667e-07, "loss": 0.0043, "num_tokens": 41098893.0, "reward": 1.0783536434173584, "reward_std": 0.28535914421081543, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.6528009176254272, "rewards/format_reward_step": 0.9609375, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40977200120687485, "aux_distill/mean_u": 0.2495710649067675, "aux_distill/n_active_tok": 578.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5549559036461762, "calib/avg_num_step_conf": 9.26171875, "calib/ece": 0.24531285140562253, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.3654618473895582, "calib/gap": 0.044854528103198565, "calib/mean_conf": 0.7789441767068271, "calib/mu_c": 0.7982190140845071, "calib/mu_w": 0.7533644859813086, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.22698795180722897, "calib/std_conf": 0.2157856955449833, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2680.0, "completions/max_terminated_length": 2680.0, "completions/mean_length": 565.27734375, "completions/mean_terminated_length": 574.2500610351562, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.1984, "grad_norm": 0.005148930009454489, "learning_rate": 3.8888888888888895e-07, "loss": 0.0008, "num_tokens": 41348644.0, "reward": 1.090340495109558, "reward_std": 0.2756432890892029, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.661149799823761, "rewards/format_reward_step": 0.96484375, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3799622510559857, "aux_distill/mean_u": 0.21517597478940167, "aux_distill/n_active_tok": 598.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6202479338842976, "calib/avg_num_step_conf": 9.4296875, "calib/ece": 0.30274701195219134, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3745019920318725, "calib/gap": 0.07251358550540377, "calib/mean_conf": 0.7975717131474103, "calib/mu_c": 0.8325284615384616, "calib/mu_w": 0.7600148760330578, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2911952191235061, "calib/std_conf": 0.20689881747839342, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 578.2890625, "completions/mean_terminated_length": 580.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.19946666666666665, "grad_norm": 0.005258694291114807, "learning_rate": 3.611111111111111e-07, "loss": 0.0736, "num_tokens": 41598230.0, "reward": 1.0704033374786377, "reward_std": 0.2771027088165283, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6525255441665649, "rewards/format_reward_step": 0.98046875, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38288334896788, "aux_distill/mean_u": 0.2397077996049779, "aux_distill/n_active_tok": 601.59375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6624349635796046, "calib/avg_num_step_conf": 9.40234375, "calib/ece": 0.19652540322580647, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3709677419354839, "calib/gap": 0.11033354838709686, "calib/mean_conf": 0.7934745967741935, "calib/mu_c": 0.8348496774193549, "calib/mu_w": 0.724516129032258, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18250000000000002, "calib/std_conf": 0.20560450633201344, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2599.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 624.4296875, "completions/mean_terminated_length": 634.34130859375, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.20053333333333334, "grad_norm": 0.004758658353239298, "learning_rate": 3.3333333333333335e-07, "loss": 0.0602, "num_tokens": 41862156.0, "reward": 1.1487858295440674, "reward_std": 0.24109750986099243, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7233529686927795, "rewards/format_reward_step": 0.96875, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38821121491491795, "aux_distill/mean_u": 0.22937579912574796, "aux_distill/n_active_tok": 518.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6888692115964843, "calib/avg_num_step_conf": 8.09765625, "calib/ece": 0.2161154150197629, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.4426877470355731, "calib/gap": 0.13102106782106793, "calib/mean_conf": 0.8089438735177866, "calib/mu_c": 0.8602129870129871, "calib/mu_w": 0.7291919191919192, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2081818181818182, "calib/std_conf": 0.20390016974333175, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 546.7890625, "completions/mean_terminated_length": 546.7890625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.2016, "grad_norm": 0.005810108035802841, "learning_rate": 3.055555555555556e-07, "loss": 0.0947, "num_tokens": 42109902.0, "reward": 1.1581063270568848, "reward_std": 0.22317633032798767, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.7302750945091248, "rewards/format_reward_step": 0.984375, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.370077112223953, "aux_distill/mean_u": 0.2316970306220341, "aux_distill/n_active_tok": 632.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6490683229813664, "calib/avg_num_step_conf": 9.875, "calib/ece": 0.15501976284584967, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.33992094861660077, "calib/gap": 0.11170807453416143, "calib/mean_conf": 0.7834782608695652, "calib/mu_c": 0.8240993788819876, "calib/mu_w": 0.7123913043478262, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15106719367588922, "calib/std_conf": 0.19828851828568378, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2046.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 614.49609375, "completions/mean_terminated_length": 616.9059448242188, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.20266666666666666, "grad_norm": 0.0051289028488099575, "learning_rate": 2.7777777777777776e-07, "loss": 0.0136, "num_tokens": 42372821.0, "reward": 1.1838111877441406, "reward_std": 0.21080797910690308, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7504351735115051, "rewards/format_reward_step": 0.98828125, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3612460242584348, "aux_distill/mean_u": 0.18409684871874585, "aux_distill/n_active_tok": 587.25, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5992965018716924, "calib/avg_num_step_conf": 9.17578125, "calib/ece": 0.27774297188755037, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3815261044176707, "calib/gap": 0.09912056279850268, "calib/mean_conf": 0.781293172690763, "calib/mu_c": 0.8298582677165355, "calib/mu_w": 0.7307377049180328, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2744979919678716, "calib/std_conf": 0.21773484849829758, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2912.0, "completions/max_terminated_length": 2912.0, "completions/mean_length": 573.2890625, "completions/mean_terminated_length": 575.5372924804688, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.20373333333333332, "grad_norm": 0.005880712065845728, "learning_rate": 2.5000000000000004e-07, "loss": 0.0726, "num_tokens": 42623751.0, "reward": 1.0624706745147705, "reward_std": 0.24356943368911743, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6600974798202515, "rewards/format_reward_step": 0.96875, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3918271157890558, "aux_distill/mean_u": 0.2257468611779381, "aux_distill/n_active_tok": 514.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6300331125827815, "calib/avg_num_step_conf": 8.03515625, "calib/ece": 0.20690677290836654, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3784860557768924, "calib/gap": 0.106654728476821, "calib/mean_conf": 0.7816988047808765, "calib/mu_c": 0.8241907284768212, "calib/mu_w": 0.7175360000000002, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.19350597609561754, "calib/std_conf": 0.2273512524500433, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2626.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 557.2890625, "completions/mean_terminated_length": 561.6771850585938, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.2048, "grad_norm": 0.006296477280557156, "learning_rate": 2.2222222222222224e-07, "loss": 0.0647, "num_tokens": 42871393.0, "reward": 1.1417138576507568, "reward_std": 0.2670000195503235, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.7131150364875793, "rewards/format_reward_step": 0.98046875, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40630976390093565, "aux_distill/mean_u": 0.22802555206204686, "aux_distill/n_active_tok": 538.90625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6023408972352636, "calib/avg_num_step_conf": 8.66015625, "calib/ece": 0.2670236, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.46, "calib/gap": 0.048302217005738224, "calib/mean_conf": 0.8181764, "calib/mu_c": 0.8390429577464789, "calib/mu_w": 0.7907407407407406, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.25860000000000005, "calib/std_conf": 0.19366300452858826, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2099.0, "completions/max_terminated_length": 2099.0, "completions/mean_length": 521.734375, "completions/mean_terminated_length": 525.842529296875, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.20586666666666667, "grad_norm": 0.006823267322033644, "learning_rate": 1.9444444444444447e-07, "loss": 0.0735, "num_tokens": 43110669.0, "reward": 1.0947258472442627, "reward_std": 0.38915589451789856, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6621078252792358, "rewards/format_reward_step": 0.97265625, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37754127802327275, "aux_distill/mean_u": 0.2385847335013782, "aux_distill/n_active_tok": 527.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5573550121623352, "calib/avg_num_step_conf": 8.421875, "calib/ece": 0.2580699604743084, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.40711462450592883, "calib/gap": 0.04432505441044665, "calib/mean_conf": 0.8026415019762846, "calib/mu_c": 0.8213876712328766, "calib/mu_w": 0.7770626168224299, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24181818181818193, "calib/std_conf": 0.20781324405356888, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2675.0, "completions/max_terminated_length": 2675.0, "completions/mean_length": 520.0703125, "completions/mean_terminated_length": 522.10986328125, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.20693333333333333, "grad_norm": 0.006866298615932465, "learning_rate": 1.6666666666666668e-07, "loss": 0.0503, "num_tokens": 43349751.0, "reward": 1.1137315034866333, "reward_std": 0.2670598328113556, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.672775387763977, "rewards/format_reward_step": 0.984375, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39284872729331255, "aux_distill/mean_u": 0.25530160009173675, "aux_distill/n_active_tok": 561.0, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4998744507219084, "calib/avg_num_step_conf": 8.765625, "calib/ece": 0.2733596837944663, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.36363636363636365, "calib/gap": 0.01785122410546125, "calib/mean_conf": 0.7901185770750988, "calib/mu_c": 0.7984444444444444, "calib/mu_w": 0.7805932203389832, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26494071146245046, "calib/std_conf": 0.19763319361916326, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 582.671875, "completions/mean_terminated_length": 582.671875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.208, "grad_norm": 0.005913162138313055, "learning_rate": 1.3888888888888888e-07, "loss": 0.0545, "num_tokens": 43604899.0, "reward": 1.0780566930770874, "reward_std": 0.25560739636421204, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6443945169448853, "rewards/format_reward_step": 0.984375, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3867280255071819, "aux_distill/mean_u": 0.20433668531479132, "aux_distill/n_active_tok": 469.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5802331349206351, "calib/avg_num_step_conf": 7.3828125, "calib/ece": 0.2953945312499999, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.4921875, "calib/gap": 0.06552678571428583, "calib/mean_conf": 0.85133203125, "calib/mu_c": 0.8800000000000001, "calib/mu_w": 0.8144732142857143, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2921132812499999, "calib/std_conf": 0.16652546231432305, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1235.0, "completions/max_terminated_length": 1235.0, "completions/mean_length": 448.0, "completions/mean_terminated_length": 449.75689697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.20906666666666668, "grad_norm": 0.00626879557967186, "learning_rate": 1.1111111111111112e-07, "loss": 0.0276, "num_tokens": 43822131.0, "reward": 1.1187515258789062, "reward_std": 0.21417656540870667, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6750030517578125, "rewards/format_reward_step": 1.0, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3991149263456464, "aux_distill/mean_u": 0.24741979634781106, "aux_distill/n_active_tok": 582.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5721665721665722, "calib/avg_num_step_conf": 9.1015625, "calib/ece": 0.2724870078740157, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.44881889763779526, "calib/gap": 0.07266039816039827, "calib/mean_conf": 0.8078279527559056, "calib/mu_c": 0.839581118881119, "calib/mu_w": 0.7669207207207207, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25866141732283465, "calib/std_conf": 0.21488679432697524, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 559.3671875, "completions/mean_terminated_length": 561.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.21013333333333334, "grad_norm": 0.00560788344591856, "learning_rate": 8.333333333333334e-08, "loss": 0.0351, "num_tokens": 44070385.0, "reward": 1.11452054977417, "reward_std": 0.2765326201915741, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6782600283622742, "rewards/format_reward_step": 0.9921875, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3851682706736028, "aux_distill/mean_u": 0.1962447667898319, "aux_distill/n_active_tok": 536.71875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5855562784645412, "calib/avg_num_step_conf": 8.51953125, "calib/ece": 0.24864541832669326, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.38247011952191234, "calib/gap": 0.0758646714378659, "calib/mean_conf": 0.800996015936255, "calib/mu_c": 0.8330344827586207, "calib/mu_w": 0.7571698113207548, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2359760956175299, "calib/std_conf": 0.19730304109153482, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3025.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 526.3125, "completions/mean_terminated_length": 528.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.2112, "grad_norm": 0.005777986254543066, "learning_rate": 5.555555555555556e-08, "loss": 0.0533, "num_tokens": 44310505.0, "reward": 1.1186888217926025, "reward_std": 0.29297205805778503, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6905027627944946, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3839912237599492, "aux_distill/mean_u": 0.25348926264527655, "aux_distill/n_active_tok": 562.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6176393699758186, "calib/avg_num_step_conf": 8.8203125, "calib/ece": 0.25571079999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.428, "calib/gap": 0.09906385857133526, "calib/mean_conf": 0.7651692, "calib/mu_c": 0.8075685314685316, "calib/mu_w": 0.7085046728971963, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22443999999999997, "calib/std_conf": 0.2612843289050455, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3061.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 599.20703125, "completions/mean_terminated_length": 606.312255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.21226666666666666, "grad_norm": 0.0047719781287014484, "learning_rate": 2.777777777777778e-08, "loss": -0.0175, "num_tokens": 44568102.0, "reward": 1.1084496974945068, "reward_std": 0.30771604180336, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6817431449890137, "rewards/format_reward_step": 0.9765625, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3788956059142947, "aux_distill/mean_u": 0.21847873765485837, "aux_distill/n_active_tok": 494.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5257306626354245, "calib/avg_num_step_conf": 7.71875, "calib/ece": 0.24171921568627447, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.39215686274509803, "calib/gap": 0.04255623582766466, "calib/mean_conf": 0.7831435294117648, "calib/mu_c": 0.8011673469387756, "calib/mu_w": 0.7586111111111109, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22419607843137251, "calib/std_conf": 0.22645131673339491, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2081.0, "completions/max_terminated_length": 2081.0, "completions/mean_length": 545.921875, "completions/mean_terminated_length": 548.0628051757812, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.21333333333333335, "grad_norm": 0.005753261968493462, "learning_rate": 0.0, "loss": 0.0435, "num_tokens": 44815906.0, "reward": 1.1214730739593506, "reward_std": 0.23278102278709412, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6765400171279907, "rewards/format_reward_step": 0.9921875, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.06408213273854926, "train_runtime": 18422.6439, "train_samples_per_second": 2.779, "train_steps_per_second": 0.011 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 44815906, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }