{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.9022029798764449, "aux_distill/mean_u": 0.3108363672915008, "aux_distill/n_active_tok": 54.0, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.012633531354367733, "learning_rate": 2.5000000000000004e-07, "loss": 0.0691, "num_tokens": 264685.0, "reward": 0.045387499034404755, "reward_std": 0.09252828359603882, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.5604067397745032, "aux_distill/mean_u": 0.29356464889172007, "aux_distill/n_active_tok": 58.36842105263158, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.008606677874922752, "learning_rate": 5.000000000000001e-07, "loss": 0.0988, "num_tokens": 533467.0, "reward": 0.09099707007408142, "reward_std": 0.17441941797733307, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.6411322099821908, "aux_distill/mean_u": 0.3704558609972279, "aux_distill/n_active_tok": 37.142857142857146, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.423076923076923, "calib/avg_num_step_conf": 0.25390625, "calib/ece": 0.7649374999999999, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": -0.003410256410256385, "calib/mean_conf": 0.9324374999999999, "calib/mu_c": 0.9296666666666668, "calib/mu_w": 0.9330769230769231, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.0703125, "calib/nonempty_step_conf_rate": 0.0625, "calib/pce": 0.7549374999999999, "calib/std_conf": 0.10546502308229966, "calib/step_conf_rate": 0.0625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2998.0, "completions/max_terminated_length": 2998.0, "completions/mean_length": 577.6328125, "completions/mean_terminated_length": 651.4273071289062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.009327870793640614, "learning_rate": 7.5e-07, "loss": 0.0704, "num_tokens": 786597.0, "reward": 0.047834958881139755, "reward_std": 0.105415940284729, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.01363866776227951, "rewards/format_reward_step": 0.05859375, "step": 3 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6355656509598097, "aux_distill/mean_u": 0.32851766073949795, "aux_distill/n_active_tok": 54.75, "calib/answer_extract_rate": 0.08203125, "calib/avg_num_step_conf": 0.3203125, "calib/ece": 0.9392307692307692, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/mean_conf": 0.9392307692307693, "calib/mu_c": NaN, "calib/mu_w": 0.9392307692307693, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.9392307692307692, "calib/std_conf": 0.06498065255982423, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 705.89453125, "completions/mean_terminated_length": 772.2607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.006254538893699646, "learning_rate": 1.0000000000000002e-06, "loss": 0.0338, "num_tokens": 1073474.0, "reward": 0.018050584942102432, "reward_std": 0.04258500412106514, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.004851171746850014, "rewards/format_reward_step": 0.03125, "step": 4 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6282181943004782, "aux_distill/mean_u": 0.2853342183461234, "aux_distill/n_active_tok": 51.63636363636363, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.9090909090909091, "calib/avg_num_step_conf": 0.27734375, "calib/ece": 0.8525000000000003, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.0390625, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.04818181818181799, "calib/mean_conf": 0.9358333333333335, "calib/mu_c": 0.98, "calib/mu_w": 0.931818181818182, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.8525000000000003, "calib/std_conf": 0.05780114377953278, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3033.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 670.96484375, "completions/mean_terminated_length": 727.8262939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.007472812198102474, "learning_rate": 1.25e-06, "loss": 0.0364, "num_tokens": 1351929.0, "reward": 0.02683398313820362, "reward_std": 0.07185898721218109, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.006792968139052391, "rewards/format_reward_step": 0.0390625, "step": 5 }, { "aux_distill/lambda": 0.10000000000000002, "aux_distill/loss": 0.6285811681300402, "aux_distill/mean_u": 0.3202117609486028, "aux_distill/n_active_tok": 56.5, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.5357142857142856, "calib/avg_num_step_conf": 0.44140625, "calib/ece": 0.6455000000000001, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": 0.013095238095237938, "calib/mean_conf": 0.8975000000000002, "calib/mu_c": 0.9066666666666666, "calib/mu_w": 0.8935714285714287, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.12109375, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.6215, "calib/std_conf": 0.19531704994700286, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 567.91015625, "completions/mean_terminated_length": 621.303466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0064, "grad_norm": 0.007908736355602741, "learning_rate": 1.5e-06, "loss": 0.0879, "num_tokens": 1603266.0, "reward": 0.0757259726524353, "reward_std": 0.16911853849887848, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.030358202755451202, "rewards/format_reward_step": 0.06640625, "step": 6 }, { "aux_distill/lambda": 0.1, "aux_distill/loss": 0.7187803174768176, "aux_distill/mean_u": 0.35720473139848385, "aux_distill/n_active_tok": 37.714285714285715, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.7181818181818183, "calib/avg_num_step_conf": 0.265625, "calib/ece": 0.535, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8125, "calib/gap": 0.18109090909090886, "calib/mean_conf": 0.8474999999999999, "calib/mu_c": 0.9719999999999999, "calib/mu_w": 0.790909090909091, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1015625, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.535, "calib/std_conf": 0.2674532669458348, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1015625, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 691.8046875, "completions/mean_terminated_length": 770.0086669921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.009156746789813042, "learning_rate": 1.75e-06, "loss": 0.0396, "num_tokens": 1887792.0, "reward": 0.05140624940395355, "reward_std": 0.13243496417999268, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.020781250670552254, "rewards/format_reward_step": 0.04296875, "step": 7 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.803349397248692, "aux_distill/mean_u": 0.3246804844449429, "aux_distill/n_active_tok": 51.833333333333336, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.575, "calib/avg_num_step_conf": 0.45703125, "calib/ece": 0.5033333333333333, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.8333333333333334, "calib/gap": 0.010749999999999815, "calib/mean_conf": 0.9477777777777777, "calib/mu_c": 0.95375, "calib/mu_w": 0.9430000000000002, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.13671875, "calib/nonempty_step_conf_rate": 0.09765625, "calib/pce": 0.5033333333333333, "calib/std_conf": 0.03866602809178958, "calib/step_conf_rate": 0.09765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2839.0, "completions/max_terminated_length": 2839.0, "completions/mean_length": 670.27734375, "completions/mean_terminated_length": 717.9539794921875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.01636502891778946, "learning_rate": 2.0000000000000003e-06, "loss": 0.0735, "num_tokens": 2165895.0, "reward": 0.0802062526345253, "reward_std": 0.13919153809547424, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.0315062515437603, "rewards/format_reward_step": 0.06640625, "step": 8 }, { "aux_distill/lambda": 0.09999999999999999, "aux_distill/loss": 0.6690852016210556, "aux_distill/mean_u": 0.4719379662396538, "aux_distill/n_active_tok": 43.7, "calib/answer_extract_rate": 0.0703125, "calib/avg_num_step_conf": 0.2109375, "calib/ece": 0.7616666666666667, "calib/final_conf_rate": 0.0234375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/mean_conf": 0.7616666666666666, "calib/mu_c": NaN, "calib/mu_w": 0.7616666666666666, "calib/nonempty_final_conf_rate": 0.0234375, "calib/nonempty_reasoning_rate": 0.078125, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.7616666666666667, "calib/std_conf": 0.35248719440884974, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 659.15625, "completions/mean_terminated_length": 721.1282348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.006472648587077856, "learning_rate": 2.25e-06, "loss": 0.0353, "num_tokens": 2442175.0, "reward": 0.011277538724243641, "reward_std": 0.03189769759774208, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.0030238283798098564, "rewards/format_reward_step": 0.01953125, "step": 9 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6920461853345236, "aux_distill/mean_u": 0.2160209553051777, "aux_distill/n_active_tok": 48.0, "calib/answer_extract_rate": 0.125, "calib/auroc": 0.3695652173913044, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.7415384615384615, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.0703125, "calib/frac_conf_gt_0.9": 0.7307692307692307, "calib/gap": -0.053043478260869636, "calib/mean_conf": 0.856923076923077, "calib/mu_c": 0.8099999999999999, "calib/mu_w": 0.8630434782608696, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.10546875, "calib/pce": 0.7415384615384615, "calib/std_conf": 0.26039569343551616, "calib/step_conf_rate": 0.10546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3024.0, "completions/max_terminated_length": 3024.0, "completions/mean_length": 626.609375, "completions/mean_terminated_length": 679.7118530273438, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.00994604080915451, "learning_rate": 2.5e-06, "loss": 0.0655, "num_tokens": 2709387.0, "reward": 0.051632028073072433, "reward_std": 0.11215013265609741, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.00951406266540289, "rewards/format_reward_step": 0.0703125, "step": 10 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.6356026422977448, "aux_distill/mean_u": 0.29512467347753385, "aux_distill/n_active_tok": 58.72, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.4682539682539683, "calib/avg_num_step_conf": 0.69921875, "calib/ece": 0.6884, "calib/final_conf_rate": 0.09765625, "calib/format_rate": 0.0859375, "calib/frac_conf_gt_0.9": 0.84, "calib/gap": -0.03428571428571414, "calib/mean_conf": 0.9404, "calib/mu_c": 0.9157142857142857, "calib/mu_w": 0.9499999999999998, "calib/nonempty_final_conf_rate": 0.09765625, "calib/nonempty_reasoning_rate": 0.1953125, "calib/nonempty_step_conf_rate": 0.15234375, "calib/pce": 0.6744, "calib/std_conf": 0.07345638161521433, "calib/step_conf_rate": 0.15234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2943.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 661.31640625, "completions/mean_terminated_length": 717.3601684570312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.01098595280200243, "learning_rate": 2.7500000000000004e-06, "loss": 0.0929, "num_tokens": 2983164.0, "reward": 0.09054765850305557, "reward_std": 0.18365205824375153, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.032657817006111145, "rewards/format_reward_step": 0.0859375, "step": 11 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.625461015701294, "aux_distill/mean_u": 0.3472341701960909, "aux_distill/n_active_tok": 50.88, "calib/answer_extract_rate": 0.19921875, "calib/auroc": 0.316, "calib/avg_num_step_conf": 0.625, "calib/ece": 0.6594285714285714, "calib/final_conf_rate": 0.13671875, "calib/format_rate": 0.1015625, "calib/frac_conf_gt_0.9": 0.8, "calib/gap": -0.03220000000000012, "calib/mean_conf": 0.884, "calib/mu_c": 0.861, "calib/mu_w": 0.8932000000000001, "calib/nonempty_final_conf_rate": 0.13671875, "calib/nonempty_reasoning_rate": 0.21875, "calib/nonempty_step_conf_rate": 0.13671875, "calib/pce": 0.6288571428571428, "calib/std_conf": 0.21212125912182533, "calib/step_conf_rate": 0.13671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3064.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 608.4296875, "completions/mean_terminated_length": 659.9915161132812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.01760835014283657, "learning_rate": 3e-06, "loss": 0.1187, "num_tokens": 3243098.0, "reward": 0.1129060611128807, "reward_std": 0.21075576543807983, "rewards/accuracy_reward_step": 0.0390625, "rewards/final_brier_reward_step": 0.04612461104989052, "rewards/format_reward_step": 0.1015625, "step": 12 }, { "aux_distill/lambda": 0.10000000000000003, "aux_distill/loss": 0.5849946607714114, "aux_distill/mean_u": 0.27308247281210013, "aux_distill/n_active_tok": 72.8695652173913, "calib/answer_extract_rate": 0.15625, "calib/auroc": 0.48214285714285715, "calib/avg_num_step_conf": 0.8203125, "calib/ece": 0.785, "calib/final_conf_rate": 0.125, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.71875, "calib/gap": 0.03142857142857136, "calib/mean_conf": 0.9099999999999999, "calib/mu_c": 0.9375, "calib/mu_w": 0.9060714285714286, "calib/nonempty_final_conf_rate": 0.125, "calib/nonempty_reasoning_rate": 0.23046875, "calib/nonempty_step_conf_rate": 0.17578125, "calib/pce": 0.785, "calib/std_conf": 0.15313392831113554, "calib/step_conf_rate": 0.17578125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0859375, "completions/max_length": 2971.0, "completions/max_terminated_length": 2971.0, "completions/mean_length": 593.6015625, "completions/mean_terminated_length": 649.4102783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.00963516067713499, "learning_rate": 3.2500000000000002e-06, "loss": 0.0712, "num_tokens": 3499652.0, "reward": 0.07337792962789536, "reward_std": 0.14072483777999878, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.02175585925579071, "rewards/format_reward_step": 0.09375, "step": 13 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5950426235795021, "aux_distill/mean_u": 0.2380964431578831, "aux_distill/n_active_tok": 60.75, "calib/answer_extract_rate": 0.20703125, "calib/auroc": 0.4095238095238095, "calib/avg_num_step_conf": 0.796875, "calib/ece": 0.6704878048780488, "calib/final_conf_rate": 0.16015625, "calib/format_rate": 0.125, "calib/frac_conf_gt_0.9": 0.6341463414634146, "calib/gap": -0.09133333333333349, "calib/mean_conf": 0.8046341463414632, "calib/mu_c": 0.7266666666666666, "calib/mu_w": 0.8180000000000001, "calib/nonempty_final_conf_rate": 0.16015625, "calib/nonempty_reasoning_rate": 0.26171875, "calib/nonempty_step_conf_rate": 0.1875, "calib/pce": 0.6643902439024391, "calib/std_conf": 0.2539166135543427, "calib/step_conf_rate": 0.1875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3047.0, "completions/max_terminated_length": 3047.0, "completions/mean_length": 584.20703125, "completions/mean_terminated_length": 633.7161254882812, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.014933333333333333, "grad_norm": 0.01152648776769638, "learning_rate": 3.5e-06, "loss": 0.0984, "num_tokens": 3754609.0, "reward": 0.10741367191076279, "reward_std": 0.19179916381835938, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.042952343821525574, "rewards/format_reward_step": 0.125, "step": 14 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5943296541427744, "aux_distill/mean_u": 0.35168131655490353, "aux_distill/n_active_tok": 101.96551724137932, "calib/answer_extract_rate": 0.26171875, "calib/auroc": 0.5046583850931676, "calib/avg_num_step_conf": 1.4296875, "calib/ece": 0.7773584905660378, "calib/final_conf_rate": 0.20703125, "calib/format_rate": 0.17578125, "calib/frac_conf_gt_0.9": 0.7547169811320755, "calib/gap": -0.04512422360248458, "calib/mean_conf": 0.8777358490566037, "calib/mu_c": 0.8385714285714286, "calib/mu_w": 0.8836956521739132, "calib/nonempty_final_conf_rate": 0.20703125, "calib/nonempty_reasoning_rate": 0.3125, "calib/nonempty_step_conf_rate": 0.25, "calib/pce": 0.7615094339622641, "calib/std_conf": 0.20207080311010325, "calib/step_conf_rate": 0.25, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2978.0, "completions/max_terminated_length": 2978.0, "completions/mean_length": 571.8984375, "completions/mean_terminated_length": 610.0250244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.012240985408425331, "learning_rate": 3.7500000000000005e-06, "loss": 0.145, "num_tokens": 4008895.0, "reward": 0.13784393668174744, "reward_std": 0.20476070046424866, "rewards/accuracy_reward_step": 0.02734375, "rewards/final_brier_reward_step": 0.045219141989946365, "rewards/format_reward_step": 0.17578125, "step": 15 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5600810666238109, "aux_distill/mean_u": 0.3982821064576951, "aux_distill/n_active_tok": 90.41935483870968, "calib/answer_extract_rate": 0.296875, "calib/auroc": 0.523109243697479, "calib/avg_num_step_conf": 1.3359375, "calib/ece": 0.5483203389830508, "calib/final_conf_rate": 0.23046875, "calib/format_rate": 0.203125, "calib/frac_conf_gt_0.9": 0.5084745762711864, "calib/gap": 0.028301540616246457, "calib/mean_conf": 0.7563237288135594, "calib/mu_c": 0.7764705882352941, "calib/mu_w": 0.7481690476190477, "calib/nonempty_final_conf_rate": 0.23046875, "calib/nonempty_reasoning_rate": 0.3828125, "calib/nonempty_step_conf_rate": 0.30078125, "calib/pce": 0.5082542372881356, "calib/std_conf": 0.30900877138501365, "calib/step_conf_rate": 0.30078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 2765.0, "completions/max_terminated_length": 2765.0, "completions/mean_length": 599.84375, "completions/mean_terminated_length": 626.7755126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.017066666666666667, "grad_norm": 0.01254871767014265, "learning_rate": 4.000000000000001e-06, "loss": 0.1227, "num_tokens": 4271303.0, "reward": 0.21736013889312744, "reward_std": 0.37067097425460815, "rewards/accuracy_reward_step": 0.06640625, "rewards/final_brier_reward_step": 0.09878277778625488, "rewards/format_reward_step": 0.203125, "step": 16 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.518870398402214, "aux_distill/mean_u": 0.30059635668181844, "aux_distill/n_active_tok": 112.71875, "calib/answer_extract_rate": 0.359375, "calib/auroc": 0.546875, "calib/avg_num_step_conf": 1.77734375, "calib/ece": 0.5814285714285714, "calib/final_conf_rate": 0.30078125, "calib/format_rate": 0.24609375, "calib/frac_conf_gt_0.9": 0.4155844155844156, "calib/gap": 0.050576923076923186, "calib/mean_conf": 0.711038961038961, "calib/mu_c": 0.7530769230769232, "calib/mu_w": 0.7025, "calib/nonempty_final_conf_rate": 0.30078125, "calib/nonempty_reasoning_rate": 0.46875, "calib/nonempty_step_conf_rate": 0.39453125, "calib/pce": 0.5618181818181818, "calib/std_conf": 0.32062270749271565, "calib/step_conf_rate": 0.39453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2799.0, "completions/max_terminated_length": 2799.0, "completions/mean_length": 539.28515625, "completions/mean_terminated_length": 565.807373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.011570603586733341, "learning_rate": 4.25e-06, "loss": 0.1764, "num_tokens": 4512888.0, "reward": 0.24242910742759705, "reward_std": 0.3743348717689514, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.1137644499540329, "rewards/format_reward_step": 0.24609375, "step": 17 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5079508516937494, "aux_distill/mean_u": 0.3073023040476281, "aux_distill/n_active_tok": 135.84375, "calib/answer_extract_rate": 0.39453125, "calib/auroc": 0.47294117647058825, "calib/avg_num_step_conf": 2.09765625, "calib/ece": 0.5014130434782609, "calib/final_conf_rate": 0.359375, "calib/format_rate": 0.3125, "calib/frac_conf_gt_0.9": 0.33695652173913043, "calib/gap": -0.026847058823529557, "calib/mean_conf": 0.6542391304347825, "calib/mu_c": 0.6323529411764705, "calib/mu_w": 0.6592, "calib/nonempty_final_conf_rate": 0.359375, "calib/nonempty_reasoning_rate": 0.4921875, "calib/nonempty_step_conf_rate": 0.4296875, "calib/pce": 0.48543478260869566, "calib/std_conf": 0.32658055033959604, "calib/step_conf_rate": 0.4296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03515625, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 593.0625, "completions/mean_terminated_length": 614.6720581054688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.011249953880906105, "learning_rate": 4.5e-06, "loss": 0.1976, "num_tokens": 4775432.0, "reward": 0.3123294711112976, "reward_std": 0.43367162346839905, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.1715339869260788, "rewards/format_reward_step": 0.3125, "step": 18 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.555460131727159, "aux_distill/mean_u": 0.351152560522153, "aux_distill/n_active_tok": 189.0625, "calib/answer_extract_rate": 0.6015625, "calib/auroc": 0.5336134453781513, "calib/avg_num_step_conf": 2.86328125, "calib/ece": 0.44894987468671677, "calib/final_conf_rate": 0.51953125, "calib/format_rate": 0.4140625, "calib/frac_conf_gt_0.9": 0.21052631578947367, "calib/gap": 0.017854341736694757, "calib/mean_conf": 0.5275964912280702, "calib/mu_c": 0.5435714285714286, "calib/mu_w": 0.5257170868347338, "calib/nonempty_final_conf_rate": 0.51953125, "calib/nonempty_reasoning_rate": 0.80078125, "calib/nonempty_step_conf_rate": 0.65625, "calib/pce": 0.43564160401002505, "calib/std_conf": 0.3389578727804567, "calib/step_conf_rate": 0.65625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 441.5, "completions/mean_terminated_length": 455.7419128417969, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.020266666666666665, "grad_norm": 0.012942974455654621, "learning_rate": 4.75e-06, "loss": 0.2163, "num_tokens": 4993216.0, "reward": 0.385407954454422, "reward_std": 0.459591805934906, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.23956595361232758, "rewards/format_reward_step": 0.4140625, "step": 19 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5594816422089934, "aux_distill/mean_u": 0.3344566962682378, "aux_distill/n_active_tok": 277.34375, "calib/answer_extract_rate": 0.74609375, "calib/auroc": 0.5335101679929265, "calib/avg_num_step_conf": 4.0703125, "calib/ece": 0.31548260869565214, "calib/final_conf_rate": 0.71875, "calib/format_rate": 0.64453125, "calib/frac_conf_gt_0.9": 0.1358695652173913, "calib/gap": 0.03500841732979665, "calib/mean_conf": 0.43625543478260864, "calib/mu_c": 0.46384358974358975, "calib/mu_w": 0.4288351724137931, "calib/nonempty_final_conf_rate": 0.71875, "calib/nonempty_reasoning_rate": 0.89453125, "calib/nonempty_step_conf_rate": 0.8359375, "calib/pce": 0.2698907608695652, "calib/std_conf": 0.33988152375181335, "calib/step_conf_rate": 0.8359375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 394.421875, "completions/mean_terminated_length": 395.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 16.0, "epoch": 0.021333333333333333, "grad_norm": 0.013653420843183994, "learning_rate": 5e-06, "loss": 0.1654, "num_tokens": 5199060.0, "reward": 0.7013116478919983, "reward_std": 0.555061936378479, "rewards/accuracy_reward_step": 0.16015625, "rewards/final_brier_reward_step": 0.4377795457839966, "rewards/format_reward_step": 0.64453125, "step": 20 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5351730575785041, "aux_distill/mean_u": 0.34520445152837353, "aux_distill/n_active_tok": 267.0, "calib/answer_extract_rate": 0.78515625, "calib/auroc": 0.4835216572504708, "calib/avg_num_step_conf": 4.0859375, "calib/ece": 0.34985621890547264, "calib/final_conf_rate": 0.78515625, "calib/format_rate": 0.72265625, "calib/frac_conf_gt_0.9": 0.1691542288557214, "calib/gap": -0.02221129943502831, "calib/mean_conf": 0.42572587064676615, "calib/mu_c": 0.4061666666666666, "calib/mu_w": 0.42837796610169493, "calib/nonempty_final_conf_rate": 0.78515625, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.90234375, "calib/pce": 0.328089552238806, "calib/std_conf": 0.35222877223109705, "calib/step_conf_rate": 0.90234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3019.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 370.9453125, "completions/mean_terminated_length": 373.86614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.0224, "grad_norm": 0.013144408352673054, "learning_rate": 4.9722222222222224e-06, "loss": 0.2733, "num_tokens": 5396982.0, "reward": 0.7004989385604858, "reward_std": 0.48331907391548157, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.49084165692329407, "rewards/format_reward_step": 0.72265625, "step": 21 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5181534392759204, "aux_distill/mean_u": 0.36928614539473653, "aux_distill/n_active_tok": 273.3125, "calib/answer_extract_rate": 0.875, "calib/auroc": 0.5354075514503321, "calib/avg_num_step_conf": 4.1640625, "calib/ece": 0.3315731818181818, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.79296875, "calib/frac_conf_gt_0.9": 0.15454545454545454, "calib/gap": 0.03293814616755791, "calib/mean_conf": 0.42260863636363644, "calib/mu_c": 0.4506060606060606, "calib/mu_w": 0.4176679144385027, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.3020909090909091, "calib/std_conf": 0.340468878390795, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2813.0, "completions/max_terminated_length": 2813.0, "completions/mean_length": 298.42578125, "completions/mean_terminated_length": 299.5960998535156, "completions/min_length": 0.0, "completions/min_terminated_length": 15.0, "epoch": 0.023466666666666667, "grad_norm": 0.012655031867325306, "learning_rate": 4.944444444444445e-06, "loss": 0.0866, "num_tokens": 5575195.0, "reward": 0.8091020584106445, "reward_std": 0.4806508421897888, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.5517978072166443, "rewards/format_reward_step": 0.79296875, "step": 22 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5239614294841886, "aux_distill/mean_u": 0.32456098856974674, "aux_distill/n_active_tok": 268.9375, "calib/answer_extract_rate": 0.85546875, "calib/auroc": 0.48690932311621965, "calib/avg_num_step_conf": 4.1484375, "calib/ece": 0.2829214611872146, "calib/final_conf_rate": 0.85546875, "calib/format_rate": 0.80859375, "calib/frac_conf_gt_0.9": 0.0867579908675799, "calib/gap": -0.0038183908045976156, "calib/mean_conf": 0.36036712328767123, "calib/mu_c": 0.35733333333333345, "calib/mu_w": 0.36115172413793106, "calib/nonempty_final_conf_rate": 0.85546875, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.21890456621004564, "calib/std_conf": 0.3172734094783932, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2549.0, "completions/max_terminated_length": 2549.0, "completions/mean_length": 313.2109375, "completions/mean_terminated_length": 313.2109375, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.024533333333333334, "grad_norm": 0.011424724943935871, "learning_rate": 4.9166666666666665e-06, "loss": 0.1664, "num_tokens": 5759313.0, "reward": 0.867918848991394, "reward_std": 0.5167853236198425, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.5756815075874329, "rewards/format_reward_step": 0.80859375, "step": 23 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5008599422872066, "aux_distill/mean_u": 0.3815701796434009, "aux_distill/n_active_tok": 279.75, "calib/answer_extract_rate": 0.8828125, "calib/auroc": 0.46545454545454545, "calib/avg_num_step_conf": 4.35546875, "calib/ece": 0.332652466367713, "calib/final_conf_rate": 0.87109375, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.1210762331838565, "calib/gap": -0.03694595959595959, "calib/mean_conf": 0.3912040358744394, "calib/mu_c": 0.35840000000000005, "calib/mu_w": 0.39534595959595964, "calib/nonempty_final_conf_rate": 0.87109375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.30587443946188336, "calib/std_conf": 0.3324925950580931, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2154.0, "completions/max_terminated_length": 2154.0, "completions/mean_length": 304.56640625, "completions/mean_terminated_length": 305.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0256, "grad_norm": 0.012385115027427673, "learning_rate": 4.888888888888889e-06, "loss": 0.1908, "num_tokens": 5941794.0, "reward": 0.7888060808181763, "reward_std": 0.4247359037399292, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.5697996616363525, "rewards/format_reward_step": 0.8125, "step": 24 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48966554924845695, "aux_distill/mean_u": 0.34171363359404794, "aux_distill/n_active_tok": 260.9375, "calib/answer_extract_rate": 0.8828125, "calib/auroc": 0.4392361111111111, "calib/avg_num_step_conf": 4.08203125, "calib/ece": 0.25293288288288285, "calib/final_conf_rate": 0.8671875, "calib/format_rate": 0.8046875, "calib/frac_conf_gt_0.9": 0.07657657657657657, "calib/gap": -0.07746406250000001, "calib/mean_conf": 0.29932927927927927, "calib/mu_c": 0.2323333333333333, "calib/mu_w": 0.3097973958333333, "calib/nonempty_final_conf_rate": 0.8671875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.2085635135135135, "calib/std_conf": 0.301357494295312, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2563.0, "completions/max_terminated_length": 2563.0, "completions/mean_length": 267.8515625, "completions/mean_terminated_length": 269.96063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.02666666666666667, "grad_norm": 0.011866779066622257, "learning_rate": 4.861111111111111e-06, "loss": 0.1395, "num_tokens": 6113588.0, "reward": 0.8221957683563232, "reward_std": 0.45238590240478516, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.5975164175033569, "rewards/format_reward_step": 0.8046875, "step": 25 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.508696723729372, "aux_distill/mean_u": 0.43063295485899017, "aux_distill/n_active_tok": 251.625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5144890260631001, "calib/avg_num_step_conf": 3.91796875, "calib/ece": 0.2462139917695473, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.053497942386831275, "calib/gap": 0.015416666666666634, "calib/mean_conf": 0.29962962962962963, "calib/mu_c": 0.31333333333333335, "calib/mu_w": 0.2979166666666667, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.2173662551440329, "calib/std_conf": 0.2912303026184558, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 713.0, "completions/max_terminated_length": 713.0, "completions/mean_length": 214.05859375, "completions/mean_terminated_length": 214.89805603027344, "completions/min_length": 0.0, "completions/min_terminated_length": 35.0, "epoch": 0.027733333333333332, "grad_norm": 0.01172387320548296, "learning_rate": 4.833333333333333e-06, "loss": 0.0078, "num_tokens": 6273627.0, "reward": 0.9250505566596985, "reward_std": 0.32336753606796265, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.7172887325286865, "rewards/format_reward_step": 0.9140625, "step": 26 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49942528642714024, "aux_distill/mean_u": 0.3542849390346178, "aux_distill/n_active_tok": 236.875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4569633995037221, "calib/avg_num_step_conf": 3.609375, "calib/ece": 0.21559665271966524, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.0041841004184100415, "calib/gap": -0.03214708436724567, "calib/mean_conf": 0.24733221757322174, "calib/mu_c": 0.21935483870967737, "calib/mu_w": 0.25150192307692304, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.16661087866108784, "calib/std_conf": 0.2538213858358564, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 987.0, "completions/max_terminated_length": 987.0, "completions/mean_length": 221.66015625, "completions/mean_terminated_length": 222.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 44.0, "epoch": 0.0288, "grad_norm": 0.012423808686435223, "learning_rate": 4.805555555555556e-06, "loss": 0.087, "num_tokens": 6435588.0, "reward": 0.9256427884101868, "reward_std": 0.36195361614227295, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.7145668268203735, "rewards/format_reward_step": 0.89453125, "step": 27 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47512149065732956, "aux_distill/mean_u": 0.34499709554861546, "aux_distill/n_active_tok": 214.125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5100290697674419, "calib/avg_num_step_conf": 3.36328125, "calib/ece": 0.18277894736842104, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.02834008097165992, "calib/gap": 0.0035413953488372396, "calib/mean_conf": 0.2619174089068826, "calib/mu_c": 0.265, "calib/mu_w": 0.2614586046511628, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.15757085020242914, "calib/std_conf": 0.24821793492654226, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1773.0, "completions/max_terminated_length": 1773.0, "completions/mean_length": 207.30078125, "completions/mean_terminated_length": 207.30078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.029866666666666666, "grad_norm": 0.013800237327814102, "learning_rate": 4.777777777777778e-06, "loss": 0.082, "num_tokens": 6595601.0, "reward": 0.9762635231018066, "reward_std": 0.2939887046813965, "rewards/accuracy_reward_step": 0.12890625, "rewards/final_brier_reward_step": 0.7572145462036133, "rewards/format_reward_step": 0.9375, "step": 28 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5141920652240515, "aux_distill/mean_u": 0.3670624937354536, "aux_distill/n_active_tok": 226.0, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5522705314009662, "calib/avg_num_step_conf": 3.4765625, "calib/ece": 0.22223467741935485, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.024193548387096774, "calib/gap": 0.06157283091787441, "calib/mean_conf": 0.26212016129032256, "calib/mu_c": 0.3179826086956522, "calib/mu_w": 0.2564097777777778, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.19580645161290325, "calib/std_conf": 0.25058815185795774, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 691.0, "completions/max_terminated_length": 691.0, "completions/mean_length": 203.31640625, "completions/mean_terminated_length": 204.11373901367188, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.030933333333333334, "grad_norm": 0.011709311045706272, "learning_rate": 4.75e-06, "loss": 0.0294, "num_tokens": 6754778.0, "reward": 0.9514590501785278, "reward_std": 0.2994237542152405, "rewards/accuracy_reward_step": 0.08984375, "rewards/final_brier_reward_step": 0.7818243503570557, "rewards/format_reward_step": 0.94140625, "step": 29 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5036658011376858, "aux_distill/mean_u": 0.3104102361376659, "aux_distill/n_active_tok": 199.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6265198237885462, "calib/avg_num_step_conf": 3.078125, "calib/ece": 0.1319047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.0725955947136564, "calib/mean_conf": 0.22420634920634924, "calib/mu_c": 0.2896, "calib/mu_w": 0.21700440528634363, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12845238095238096, "calib/std_conf": 0.2019914864409097, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 775.0, "completions/max_terminated_length": 775.0, "completions/mean_length": 170.47265625, "completions/mean_terminated_length": 171.1411895751953, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.032, "grad_norm": 0.012775780633091927, "learning_rate": 4.722222222222222e-06, "loss": 0.0336, "num_tokens": 6905403.0, "reward": 1.009130835533142, "reward_std": 0.21095620095729828, "rewards/accuracy_reward_step": 0.09765625, "rewards/final_brier_reward_step": 0.846386730670929, "rewards/format_reward_step": 0.9765625, "step": 30 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49115212354809046, "aux_distill/mean_u": 0.32121877010109623, "aux_distill/n_active_tok": 214.40625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5238933601609658, "calib/avg_num_step_conf": 3.328125, "calib/ece": 0.16140622406639005, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.029045643153526972, "calib/gap": 0.007251710261569483, "calib/mean_conf": 0.22216224066390042, "calib/mu_c": 0.22857142857142862, "calib/mu_w": 0.22131971830985914, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.1336929460580913, "calib/std_conf": 0.22215404253743912, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 600.0, "completions/max_terminated_length": 600.0, "completions/mean_length": 176.61328125, "completions/mean_terminated_length": 177.30589294433594, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.03306666666666667, "grad_norm": 0.013856728561222553, "learning_rate": 4.694444444444445e-06, "loss": 0.0632, "num_tokens": 7056528.0, "reward": 0.9648450613021851, "reward_std": 0.2989380657672882, "rewards/accuracy_reward_step": 0.11328125, "rewards/final_brier_reward_step": 0.7773463129997253, "rewards/format_reward_step": 0.92578125, "step": 31 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5210744366049767, "aux_distill/mean_u": 0.33302298813774467, "aux_distill/n_active_tok": 206.09375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5025111607142857, "calib/avg_num_step_conf": 3.140625, "calib/ece": 0.14899677419354837, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": -0.005524404761904772, "calib/mean_conf": 0.18540645161290323, "calib/mu_c": 0.18041666666666667, "calib/mu_w": 0.18594107142857144, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.11881451612903227, "calib/std_conf": 0.19145325031231744, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1639.0, "completions/max_terminated_length": 1639.0, "completions/mean_length": 175.671875, "completions/mean_terminated_length": 175.671875, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.034133333333333335, "grad_norm": 0.013500292785465717, "learning_rate": 4.666666666666667e-06, "loss": 0.0807, "num_tokens": 7208204.0, "reward": 0.9942401647567749, "reward_std": 0.23118172585964203, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.8361366391181946, "rewards/format_reward_step": 0.96484375, "step": 32 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.5026440424844623, "aux_distill/mean_u": 0.33876221840326814, "aux_distill/n_active_tok": 188.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5013993224333481, "calib/avg_num_step_conf": 2.9453125, "calib/ece": 0.12772, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008, "calib/gap": 0.007213138901163707, "calib/mean_conf": 0.18852000000000002, "calib/mu_c": 0.19483870967741942, "calib/mu_w": 0.18762557077625572, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.09612000000000001, "calib/std_conf": 0.1957054153568572, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 166.40234375, "completions/mean_terminated_length": 166.40234375, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "epoch": 0.0352, "grad_norm": 0.013900885358452797, "learning_rate": 4.638888888888889e-06, "loss": 0.0665, "num_tokens": 7357675.0, "reward": 1.0091570615768433, "reward_std": 0.23295243084430695, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8151890635490417, "rewards/format_reward_step": 0.9609375, "step": 33 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.49193944688886404, "aux_distill/mean_u": 0.3246782472719872, "aux_distill/n_active_tok": 186.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4617512142471667, "calib/avg_num_step_conf": 2.92578125, "calib/ece": 0.15749999999999997, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.03515650296815975, "calib/mean_conf": 0.1901190476190476, "calib/mu_c": 0.15970588235294117, "calib/mu_w": 0.19486238532110092, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.10634920634920635, "calib/std_conf": 0.17530128745454196, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 497.0, "completions/max_terminated_length": 497.0, "completions/mean_length": 141.4296875, "completions/mean_terminated_length": 141.9843292236328, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.03626666666666667, "grad_norm": 0.015190926380455494, "learning_rate": 4.611111111111112e-06, "loss": 0.0128, "num_tokens": 7498993.0, "reward": 1.024768352508545, "reward_std": 0.22394582629203796, "rewards/accuracy_reward_step": 0.1328125, "rewards/final_brier_reward_step": 0.8151617050170898, "rewards/format_reward_step": 0.96875, "step": 34 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4921773290261626, "aux_distill/mean_u": 0.33049255483384, "aux_distill/n_active_tok": 191.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4441244239631337, "calib/avg_num_step_conf": 3.0, "calib/ece": 0.14278431372549016, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.037167338709677394, "calib/mean_conf": 0.17329411764705882, "calib/mu_c": 0.1406451612903226, "calib/mu_w": 0.17781249999999998, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09725490196078432, "calib/std_conf": 0.1807460892769947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 427.0, "completions/max_terminated_length": 427.0, "completions/mean_length": 144.484375, "completions/mean_terminated_length": 145.05099487304688, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.037333333333333336, "grad_norm": 0.01703733019530773, "learning_rate": 4.583333333333333e-06, "loss": 0.0132, "num_tokens": 7645237.0, "reward": 1.0347568988800049, "reward_std": 0.16965264081954956, "rewards/accuracy_reward_step": 0.12109375, "rewards/final_brier_reward_step": 0.8390449285507202, "rewards/format_reward_step": 0.98828125, "step": 35 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.489220367744565, "aux_distill/mean_u": 0.3329217827101059, "aux_distill/n_active_tok": 191.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47716346153846156, "calib/avg_num_step_conf": 3.0, "calib/ece": 0.13984251968503938, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.026258361204013342, "calib/mean_conf": 0.15976377952755905, "calib/mu_c": 0.1382608695652174, "calib/mu_w": 0.16451923076923075, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05925196850393701, "calib/std_conf": 0.1777725901431028, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1750.0, "completions/max_terminated_length": 1750.0, "completions/mean_length": 153.39453125, "completions/mean_terminated_length": 153.39453125, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.0384, "grad_norm": 0.012923480942845345, "learning_rate": 4.555555555555556e-06, "loss": 0.0928, "num_tokens": 7787218.0, "reward": 1.0785343647003174, "reward_std": 0.21512489020824432, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.8055062294006348, "rewards/format_reward_step": 0.9921875, "step": 36 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4732179343700409, "aux_distill/mean_u": 0.2754847840077296, "aux_distill/n_active_tok": 194.5625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5553020767778477, "calib/avg_num_step_conf": 3.015625, "calib/ece": 0.08060705882352943, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020079358086847063, "calib/mean_conf": 0.11319686274509805, "calib/mu_c": 0.13107142857142856, "calib/mu_w": 0.1109920704845815, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04200000000000001, "calib/std_conf": 0.1273543914035377, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 591.0, "completions/max_terminated_length": 591.0, "completions/mean_length": 151.015625, "completions/mean_terminated_length": 151.60784912109375, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.039466666666666664, "grad_norm": 0.01448907982558012, "learning_rate": 4.527777777777778e-06, "loss": 0.0758, "num_tokens": 7932974.0, "reward": 1.0467637777328491, "reward_std": 0.1426790952682495, "rewards/accuracy_reward_step": 0.109375, "rewards/final_brier_reward_step": 0.8825901746749878, "rewards/format_reward_step": 0.9921875, "step": 37 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.524141519330442, "aux_distill/mean_u": 0.37779598700656625, "aux_distill/n_active_tok": 219.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5299713584288053, "calib/avg_num_step_conf": 3.453125, "calib/ece": 0.11498039215686277, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010617839607201293, "calib/mean_conf": 0.09176470588235294, "calib/mu_c": 0.10042553191489362, "calib/mu_w": 0.08980769230769232, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.011215686274509803, "calib/std_conf": 0.10985384019371217, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 509.0, "completions/max_terminated_length": 509.0, "completions/mean_length": 159.3046875, "completions/mean_terminated_length": 159.92942810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 45.0, "epoch": 0.04053333333333333, "grad_norm": 0.013505329377949238, "learning_rate": 4.5e-06, "loss": 0.0494, "num_tokens": 8080644.0, "reward": 1.09230375289917, "reward_std": 0.17799054086208344, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.8252324461936951, "rewards/format_reward_step": 0.9921875, "step": 38 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4891305109485984, "aux_distill/mean_u": 0.30491045657396, "aux_distill/n_active_tok": 212.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.48369210697977827, "calib/avg_num_step_conf": 3.33203125, "calib/ece": 0.10676574803149609, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007805283757338563, "calib/mean_conf": 0.06669881889763779, "calib/mu_c": 0.07342857142857144, "calib/mu_w": 0.06562328767123288, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.017834645669291337, "calib/std_conf": 0.09622918446857423, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 703.0, "completions/max_terminated_length": 703.0, "completions/mean_length": 161.66796875, "completions/mean_terminated_length": 162.30197143554688, "completions/min_length": 0.0, "completions/min_terminated_length": 47.0, "epoch": 0.0416, "grad_norm": 0.014169767498970032, "learning_rate": 4.472222222222223e-06, "loss": 0.0422, "num_tokens": 8228119.0, "reward": 1.0637850761413574, "reward_std": 0.1445343792438507, "rewards/accuracy_reward_step": 0.140625, "rewards/final_brier_reward_step": 0.8580389022827148, "rewards/format_reward_step": 0.98828125, "step": 39 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47824777383357286, "aux_distill/mean_u": 0.2888125463333283, "aux_distill/n_active_tok": 234.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.506860465116279, "calib/avg_num_step_conf": 3.66796875, "calib/ece": 0.1305270588235294, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00796813953488372, "calib/mean_conf": 0.05253176470588236, "calib/mu_c": 0.059250000000000004, "calib/mu_w": 0.051281860465116284, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013098039215686275, "calib/std_conf": 0.09134577386375445, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 470.0, "completions/max_terminated_length": 470.0, "completions/mean_length": 166.625, "completions/mean_terminated_length": 167.2784423828125, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.042666666666666665, "grad_norm": 0.012960164807736874, "learning_rate": 4.444444444444444e-06, "loss": 0.0551, "num_tokens": 8377535.0, "reward": 1.077946424484253, "reward_std": 0.1492486298084259, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8472990989685059, "rewards/format_reward_step": 0.99609375, "step": 40 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4979221150279045, "aux_distill/mean_u": 0.30807657441087855, "aux_distill/n_active_tok": 227.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.521875, "calib/avg_num_step_conf": 3.56640625, "calib/ece": 0.28539062499999995, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.005045454545454547, "calib/mean_conf": 0.03484375000000001, "calib/mu_c": 0.031375, "calib/mu_w": 0.03642045454545455, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038671875, "calib/std_conf": 0.05553692992898959, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 169.59765625, "completions/mean_terminated_length": 170.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 49.0, "epoch": 0.04373333333333333, "grad_norm": 0.014377506449818611, "learning_rate": 4.416666666666667e-06, "loss": 0.0596, "num_tokens": 8528200.0, "reward": 1.1560933589935303, "reward_std": 0.20653408765792847, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6949992179870605, "rewards/format_reward_step": 0.9921875, "step": 41 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4956301599740982, "aux_distill/mean_u": 0.32941970174270324, "aux_distill/n_active_tok": 229.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48813964610234334, "calib/avg_num_step_conf": 3.6015625, "calib/ece": 0.18374999999999997, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00886848397895744, "calib/mean_conf": 0.029062500000000005, "calib/mu_c": 0.02196078431372549, "calib/mu_w": 0.03082926829268293, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006796875, "calib/std_conf": 0.05621439845582269, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 426.0, "completions/max_terminated_length": 426.0, "completions/mean_length": 168.703125, "completions/mean_terminated_length": 169.36471557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 52.0, "epoch": 0.0448, "grad_norm": 0.013974346220493317, "learning_rate": 4.388888888888889e-06, "loss": 0.0691, "num_tokens": 8675756.0, "reward": 1.1019821166992188, "reward_std": 0.13847261667251587, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.8055265545845032, "rewards/format_reward_step": 1.0, "step": 42 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47923671174794436, "aux_distill/mean_u": 0.28819438981801476, "aux_distill/n_active_tok": 263.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6338541666666666, "calib/avg_num_step_conf": 4.171875, "calib/ece": 0.142558203125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.011254166666666666, "calib/mean_conf": 0.021504296875000002, "calib/mu_c": 0.031, "calib/mu_w": 0.019745833333333334, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00390625, "calib/std_conf": 0.04623921771585637, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 773.0, "completions/max_terminated_length": 773.0, "completions/mean_length": 197.10546875, "completions/mean_terminated_length": 197.87844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.04586666666666667, "grad_norm": 0.010071014054119587, "learning_rate": 4.361111111111112e-06, "loss": 0.0641, "num_tokens": 8831439.0, "reward": 1.0777781009674072, "reward_std": 0.115573450922966, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.8469623923301697, "rewards/format_reward_step": 0.99609375, "step": 43 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.47694727033376694, "aux_distill/mean_u": 0.32135310640373566, "aux_distill/n_active_tok": 328.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47371822490305904, "calib/avg_num_step_conf": 5.125, "calib/ece": 0.16690196078431377, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006278543731150367, "calib/mean_conf": 0.019058823529411767, "calib/mu_c": 0.013863636363636363, "calib/mu_w": 0.02014218009478673, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006705882352941176, "calib/std_conf": 0.054864328839493465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2174.0, "completions/max_terminated_length": 2174.0, "completions/mean_length": 227.578125, "completions/mean_terminated_length": 227.578125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.046933333333333334, "grad_norm": 0.01117937732487917, "learning_rate": 4.333333333333334e-06, "loss": 0.0676, "num_tokens": 8996019.0, "reward": 1.0788277387619019, "reward_std": 0.14245754480361938, "rewards/accuracy_reward_step": 0.171875, "rewards/final_brier_reward_step": 0.8217179775238037, "rewards/format_reward_step": 0.9921875, "step": 44 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4636860592290759, "aux_distill/mean_u": 0.25446549858857836, "aux_distill/n_active_tok": 304.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4781467779402908, "calib/avg_num_step_conf": 4.75, "calib/ece": 0.2226171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0015082164673492198, "calib/mean_conf": 0.0098046875, "calib/mu_c": 0.00864406779661017, "calib/mu_w": 0.01015228426395939, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0009765625, "calib/std_conf": 0.016804277075415763, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 627.0, "completions/max_terminated_length": 627.0, "completions/mean_length": 219.796875, "completions/mean_terminated_length": 220.65884399414062, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.048, "grad_norm": 0.009709659963846207, "learning_rate": 4.305555555555556e-06, "loss": 0.042, "num_tokens": 9157335.0, "reward": 1.1131341457366943, "reward_std": 0.14094233512878418, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.7692371010780334, "rewards/format_reward_step": 0.99609375, "step": 45 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.48097407817840576, "aux_distill/mean_u": 0.3231606234081447, "aux_distill/n_active_tok": 330.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48965356893745643, "calib/avg_num_step_conf": 5.18359375, "calib/ece": 0.265234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0017329303262807105, "calib/mean_conf": 0.00546875, "calib/mu_c": 0.004202898550724638, "calib/mu_w": 0.005935828877005348, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0005859375, "calib/std_conf": 0.012708226604743088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 758.0, "completions/max_terminated_length": 758.0, "completions/mean_length": 240.29296875, "completions/mean_terminated_length": 241.2353057861328, "completions/min_length": 0.0, "completions/min_terminated_length": 50.0, "epoch": 0.04906666666666667, "grad_norm": 0.008790974505245686, "learning_rate": 4.277777777777778e-06, "loss": 0.0275, "num_tokens": 9323618.0, "reward": 1.1358027458190918, "reward_std": 0.12116257846355438, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.7325429916381836, "rewards/format_reward_step": 1.0, "step": 46 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4657612703740597, "aux_distill/mean_u": 0.2934879503928806, "aux_distill/n_active_tok": 380.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.3742272727272727, "calib/avg_num_step_conf": 5.96484375, "calib/ece": 0.21090196078431375, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004786363636363637, "calib/mean_conf": 0.00611764705882353, "calib/mu_c": 0.0023636363636363638, "calib/mu_w": 0.007150000000000001, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0006666666666666668, "calib/std_conf": 0.0137849691641231, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 705.0, "completions/max_terminated_length": 705.0, "completions/mean_length": 259.48828125, "completions/mean_terminated_length": 260.5058898925781, "completions/min_length": 0.0, "completions/min_terminated_length": 37.0, "epoch": 0.050133333333333335, "grad_norm": 0.009199056774377823, "learning_rate": 4.25e-06, "loss": 0.0573, "num_tokens": 9496023.0, "reward": 1.0960979461669922, "reward_std": 0.16379356384277344, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.7742273211479187, "rewards/format_reward_step": 0.98828125, "step": 47 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46220773458480835, "aux_distill/mean_u": 0.27937701966110146, "aux_distill/n_active_tok": 376.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.516267648864334, "calib/avg_num_step_conf": 5.890625, "calib/ece": 0.2817391304347826, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0006606813996316765, "calib/mean_conf": 0.004110671936758893, "calib/mu_c": 0.004583333333333333, "calib/mu_w": 0.003922651933701657, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0006324110671936759, "calib/std_conf": 0.013025499646864886, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2414.0, "completions/max_terminated_length": 2414.0, "completions/mean_length": 266.04296875, "completions/mean_terminated_length": 266.04296875, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.0512, "grad_norm": 0.00915578380227089, "learning_rate": 4.222222222222223e-06, "loss": 0.1119, "num_tokens": 9667818.0, "reward": 1.1261968612670898, "reward_std": 0.1674359142780304, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7055187225341797, "rewards/format_reward_step": 0.984375, "step": 48 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4780575539916754, "aux_distill/mean_u": 0.3032898873526519, "aux_distill/n_active_tok": 370.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.460891197775664, "calib/avg_num_step_conf": 5.7734375, "calib/ece": 0.3107539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0006694958659544891, "calib/mean_conf": 0.0027380952380952383, "calib/mu_c": 0.002278481012658228, "calib/mu_w": 0.002947976878612717, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.007403000862400835, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2160.0, "completions/max_terminated_length": 2160.0, "completions/mean_length": 269.3359375, "completions/mean_terminated_length": 269.3359375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.05226666666666667, "grad_norm": 0.009693208150565624, "learning_rate": 4.194444444444445e-06, "loss": 0.122, "num_tokens": 9841304.0, "reward": 1.1432504653930664, "reward_std": 0.17814280092716217, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6771261692047119, "rewards/format_reward_step": 0.984375, "step": 49 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4636885020881891, "aux_distill/mean_u": 0.277049195947932, "aux_distill/n_active_tok": 394.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49481781376518214, "calib/avg_num_step_conf": 6.1640625, "calib/ece": 0.2532941176470588, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002995951417004048, "calib/mean_conf": 0.0016078431372549022, "calib/mu_c": 0.0013846153846153847, "calib/mu_w": 0.0016842105263157896, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004266046667756052, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2235.0, "completions/max_terminated_length": 2235.0, "completions/mean_length": 305.46875, "completions/mean_terminated_length": 305.46875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.05333333333333334, "grad_norm": 0.007432255428284407, "learning_rate": 4.166666666666667e-06, "loss": 0.077, "num_tokens": 10024864.0, "reward": 1.1233880519866943, "reward_std": 0.13776543736457825, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.7428699135780334, "rewards/format_reward_step": 0.99609375, "step": 50 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.44833634700626135, "aux_distill/mean_u": 0.2464335719181652, "aux_distill/n_active_tok": 393.0625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4710574113559188, "calib/avg_num_step_conf": 6.17578125, "calib/ece": 0.2601953125, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008505093579720445, "calib/mean_conf": 0.0015234375, "calib/mu_c": 0.0008955223880597015, "calib/mu_w": 0.001746031746031746, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004721071190269615, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1022.0, "completions/max_terminated_length": 1022.0, "completions/mean_length": 270.13671875, "completions/mean_terminated_length": 271.19610595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.0544, "grad_norm": 0.008282770402729511, "learning_rate": 4.138888888888889e-06, "loss": 0.0455, "num_tokens": 10203315.0, "reward": 1.131081461906433, "reward_std": 0.13202789425849915, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7387253642082214, "rewards/format_reward_step": 1.0, "step": 51 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.46477444563061, "aux_distill/mean_u": 0.2906417075283816, "aux_distill/n_active_tok": 357.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4778928571428571, "calib/avg_num_step_conf": 5.65625, "calib/ece": 0.3128627450980392, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0007107142857142858, "calib/mean_conf": 0.0008627450980392157, "calib/mu_c": 0.000375, "calib/mu_w": 0.0010857142857142858, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.004606132348243458, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 923.0, "completions/max_terminated_length": 923.0, "completions/mean_length": 264.53515625, "completions/mean_terminated_length": 265.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.055466666666666664, "grad_norm": 0.010578352957963943, "learning_rate": 4.111111111111111e-06, "loss": 0.0625, "num_tokens": 10378988.0, "reward": 1.1524499654769897, "reward_std": 0.18429045379161835, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6838062405586243, "rewards/format_reward_step": 0.99609375, "step": 52 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4559667222201824, "aux_distill/mean_u": 0.2902862957850312, "aux_distill/n_active_tok": 388.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.481833910034602, "calib/avg_num_step_conf": 6.0703125, "calib/ece": 0.3993725490196079, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005555555555555557, "calib/mean_conf": 0.0006274509803921569, "calib/mu_c": 0.0002941176470588235, "calib/mu_w": 0.0008496732026143792, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003003010152012023, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2700.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 298.67578125, "completions/mean_terminated_length": 298.67578125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.05653333333333333, "grad_norm": 0.008898251689970493, "learning_rate": 4.083333333333334e-06, "loss": 0.0719, "num_tokens": 10561273.0, "reward": 1.195425033569336, "reward_std": 0.18935687839984894, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5978812575340271, "rewards/format_reward_step": 0.99609375, "step": 53 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4361731903627515, "aux_distill/mean_u": 0.22795934520457617, "aux_distill/n_active_tok": 370.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5071147725145104, "calib/avg_num_step_conf": 5.89453125, "calib/ece": 0.4253125, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001422954502902078, "calib/mean_conf": 0.00046875000000000004, "calib/mu_c": 0.0005504587155963302, "calib/mu_w": 0.0004081632653061224, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0021137108216357317, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 958.0, "completions/max_terminated_length": 958.0, "completions/mean_length": 269.8671875, "completions/mean_terminated_length": 270.9255065917969, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.0576, "grad_norm": 0.007963367737829685, "learning_rate": 4.055555555555556e-06, "loss": 0.0373, "num_tokens": 10736591.0, "reward": 1.2111694812774658, "reward_std": 0.13964581489562988, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5746828317642212, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4443762181326747, "aux_distill/mean_u": 0.27825471713899175, "aux_distill/n_active_tok": 381.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5139276877643867, "calib/avg_num_step_conf": 5.98828125, "calib/ece": 0.2632283464566929, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.000670444568600846, "calib/mean_conf": 0.0005511811023622047, "calib/mu_c": 0.001044776119402985, "calib/mu_w": 0.000374331550802139, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0032741138403609114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2341.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 282.6328125, "completions/mean_terminated_length": 283.7411804199219, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.058666666666666666, "grad_norm": 0.009071978740394115, "learning_rate": 4.027777777777779e-06, "loss": 0.08, "num_tokens": 10916769.0, "reward": 1.1233148574829102, "reward_std": 0.1690075397491455, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.7310047149658203, "rewards/format_reward_step": 0.9921875, "step": 55 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4458486130461097, "aux_distill/mean_u": 0.28278279798028116, "aux_distill/n_active_tok": 379.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5131971051511282, "calib/avg_num_step_conf": 5.9296875, "calib/ece": 0.3173333333333333, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002639421030225628, "calib/mean_conf": 0.00031372549019607844, "calib/mu_c": 0.0004938271604938272, "calib/mu_w": 0.00022988505747126436, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0017432243741876759, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 934.0, "completions/max_terminated_length": 934.0, "completions/mean_length": 288.24609375, "completions/mean_terminated_length": 289.3764953613281, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.05973333333333333, "grad_norm": 0.007802541833370924, "learning_rate": 4.000000000000001e-06, "loss": 0.0256, "num_tokens": 11097400.0, "reward": 1.154451608657837, "reward_std": 0.17325270175933838, "rewards/accuracy_reward_step": 0.31640625, "rewards/final_brier_reward_step": 0.6799968481063843, "rewards/format_reward_step": 0.99609375, "step": 56 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43617555499076843, "aux_distill/mean_u": 0.21965836460819538, "aux_distill/n_active_tok": 389.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5176817940552018, "calib/avg_num_step_conf": 6.0859375, "calib/ece": 0.37920948616600786, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00045713906581740975, "calib/mean_conf": 0.00023715415019762845, "calib/mu_c": 0.0005208333333333333, "calib/mu_w": 6.369426751592357e-05, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001762331763605372, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1835.0, "completions/max_terminated_length": 1835.0, "completions/mean_length": 297.125, "completions/mean_terminated_length": 298.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0608, "grad_norm": 0.007988800294697285, "learning_rate": 3.972222222222223e-06, "loss": 0.0666, "num_tokens": 11280256.0, "reward": 1.17597496509552, "reward_std": 0.19168786704540253, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6136687397956848, "rewards/format_reward_step": 0.98828125, "step": 57 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42793796211481094, "aux_distill/mean_u": 0.24425906299334496, "aux_distill/n_active_tok": 380.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49844444444444447, "calib/avg_num_step_conf": 5.94140625, "calib/ece": 0.2938823529411765, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0002333333333333333, "calib/mean_conf": 0.0002352941176470588, "calib/mu_c": 0.00039999999999999996, "calib/mu_w": 0.00016666666666666666, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002156506209390983, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1794.0, "completions/max_terminated_length": 1794.0, "completions/mean_length": 297.2578125, "completions/mean_terminated_length": 297.2578125, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.06186666666666667, "grad_norm": 0.007588682230561972, "learning_rate": 3.944444444444445e-06, "loss": 0.0745, "num_tokens": 11462674.0, "reward": 1.142693042755127, "reward_std": 0.1538347750902176, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7033547163009644, "rewards/format_reward_step": 0.99609375, "step": 58 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4090555151924491, "aux_distill/mean_u": 0.22674918349077935, "aux_distill/n_active_tok": 356.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5060975609756098, "calib/avg_num_step_conf": 5.60546875, "calib/ece": 0.320234375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00024390243902439024, "calib/mean_conf": 7.8125e-05, "calib/mu_c": 0.00024390243902439024, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001247556204896196, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 905.0, "completions/max_terminated_length": 905.0, "completions/mean_length": 284.3984375, "completions/mean_terminated_length": 285.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.06293333333333333, "grad_norm": 0.00699743302538991, "learning_rate": 3.916666666666667e-06, "loss": 0.0498, "num_tokens": 11641728.0, "reward": 1.1602336168289185, "reward_std": 0.14041298627853394, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6798421740531921, "rewards/format_reward_step": 1.0, "step": 59 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4456522464752197, "aux_distill/mean_u": 0.2441319979642522, "aux_distill/n_active_tok": 359.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5079131268840779, "calib/avg_num_step_conf": 5.62109375, "calib/ece": 0.3514624505928854, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00037887092354069613, "calib/mean_conf": 0.00031620553359683795, "calib/mu_c": 0.0005617977528089888, "calib/mu_w": 0.00018292682926829268, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002154784690978499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2042.0, "completions/max_terminated_length": 2042.0, "completions/mean_length": 303.05078125, "completions/mean_terminated_length": 303.05078125, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.064, "grad_norm": 0.0078499810770154, "learning_rate": 3.88888888888889e-06, "loss": 0.1102, "num_tokens": 11828165.0, "reward": 1.1623023748397827, "reward_std": 0.19147999584674835, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6410109996795654, "rewards/format_reward_step": 0.98828125, "step": 60 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4245549370534718, "aux_distill/mean_u": 0.21719526107186932, "aux_distill/n_active_tok": 360.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5060566931070528, "calib/avg_num_step_conf": 5.62890625, "calib/ece": 0.4565625, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00023365922646498185, "calib/mean_conf": 0.00046875000000000004, "calib/mu_c": 0.0003418803418803419, "calib/mu_w": 0.0005755395683453237, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0033910210022204228, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 992.0, "completions/max_terminated_length": 992.0, "completions/mean_length": 281.19921875, "completions/mean_terminated_length": 282.3019714355469, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.06506666666666666, "grad_norm": 0.008309507742524147, "learning_rate": 3.861111111111112e-06, "loss": 0.0503, "num_tokens": 12004216.0, "reward": 1.228666067123413, "reward_std": 0.13236938416957855, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5432695150375366, "rewards/format_reward_step": 1.0, "step": 61 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4038896760903299, "aux_distill/mean_u": 0.21998769765356624, "aux_distill/n_active_tok": 381.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.505661231884058, "calib/avg_num_step_conf": 5.98828125, "calib/ece": 0.2809375, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -4.830917874396138e-05, "calib/mean_conf": 0.0003125, "calib/mu_c": 0.0002777777777777778, "calib/mu_w": 0.00032608695652173916, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.0027775607554111218, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 888.0, "completions/max_terminated_length": 888.0, "completions/mean_length": 312.54296875, "completions/mean_terminated_length": 313.7686462402344, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.06613333333333334, "grad_norm": 0.006524847354739904, "learning_rate": 3.833333333333334e-06, "loss": 0.0302, "num_tokens": 12191307.0, "reward": 1.1367928981781006, "reward_std": 0.15469135344028473, "rewards/accuracy_reward_step": 0.28125, "rewards/final_brier_reward_step": 0.7149921655654907, "rewards/format_reward_step": 0.99609375, "step": 62 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43727352656424046, "aux_distill/mean_u": 0.26170699023718325, "aux_distill/n_active_tok": 403.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5011460931706331, "calib/avg_num_step_conf": 6.31640625, "calib/ece": 0.3579527559055118, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 2.292186341266094e-05, "calib/mean_conf": 0.00031496062992125983, "calib/mu_c": 0.00032967032967032967, "calib/mu_w": 0.00030674846625766873, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001746541182111719, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2360.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 331.5703125, "completions/mean_terminated_length": 332.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0672, "grad_norm": 0.007350383326411247, "learning_rate": 3.8055555555555556e-06, "loss": 0.0818, "num_tokens": 12384829.0, "reward": 1.1700375080108643, "reward_std": 0.19835248589515686, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6369500160217285, "rewards/format_reward_step": 0.9921875, "step": 63 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4085767618380487, "aux_distill/mean_u": 0.21745177327263837, "aux_distill/n_active_tok": 369.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5036487005505057, "calib/avg_num_step_conf": 5.76953125, "calib/ece": 0.4226086956521739, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -6.20919216489566e-05, "calib/mean_conf": 0.00031620553359683795, "calib/mu_c": 0.00028037383177570094, "calib/mu_w": 0.00034246575342465754, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0023310107032073887, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2331.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 314.87109375, "completions/mean_terminated_length": 314.87109375, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "epoch": 0.06826666666666667, "grad_norm": 0.007864679209887981, "learning_rate": 3.777777777777778e-06, "loss": 0.1226, "num_tokens": 12569212.0, "reward": 1.1973800659179688, "reward_std": 0.2078630030155182, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5705413818359375, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4152026427909732, "aux_distill/mean_u": 0.22176144665746284, "aux_distill/n_active_tok": 322.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5045454545454545, "calib/avg_num_step_conf": 5.046875, "calib/ece": 0.4296484375, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 9.090909090909092e-05, "calib/mean_conf": 3.90625e-05, "calib/mu_c": 9.090909090909092e-05, "calib/mu_w": 0.0, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0006237781024480981, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 744.0, "completions/max_terminated_length": 744.0, "completions/mean_length": 268.62109375, "completions/mean_terminated_length": 269.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 96.0, "epoch": 0.06933333333333333, "grad_norm": 0.0070212222635746, "learning_rate": 3.7500000000000005e-06, "loss": 0.0323, "num_tokens": 12743003.0, "reward": 1.2148826122283936, "reward_std": 0.12614324688911438, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5703902244567871, "rewards/format_reward_step": 1.0, "step": 65 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4280928233638406, "aux_distill/mean_u": 0.23642457940125466, "aux_distill/n_active_tok": 398.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5028768699654776, "calib/avg_num_step_conf": 6.203125, "calib/ece": 0.30929411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00017836593785960876, "calib/mean_conf": 0.0005098039215686275, "calib/mu_c": 0.0006329113924050633, "calib/mu_w": 0.00045454545454545455, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002371171209071525, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1600.0, "completions/max_terminated_length": 1600.0, "completions/mean_length": 330.25390625, "completions/mean_terminated_length": 330.25390625, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.0704, "grad_norm": 0.006328082177788019, "learning_rate": 3.7222222222222225e-06, "loss": 0.0465, "num_tokens": 12933900.0, "reward": 1.150583028793335, "reward_std": 0.132345050573349, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.6878847479820251, "rewards/format_reward_step": 0.99609375, "step": 66 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42184107284992933, "aux_distill/mean_u": 0.241509055720451, "aux_distill/n_active_tok": 356.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4847692307692308, "calib/avg_num_step_conf": 5.5703125, "calib/ece": 0.4899607843137255, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0003046153846153846, "calib/mean_conf": 0.0002352941176470588, "calib/mu_c": 8e-05, "calib/mu_w": 0.0003846153846153846, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001515776320791191, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2288.0, "completions/max_terminated_length": 2288.0, "completions/mean_length": 319.81640625, "completions/mean_terminated_length": 319.81640625, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.07146666666666666, "grad_norm": 0.007012557238340378, "learning_rate": 3.694444444444445e-06, "loss": 0.0474, "num_tokens": 13120781.0, "reward": 1.2402722835540771, "reward_std": 0.14968647062778473, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5078883171081543, "rewards/format_reward_step": 0.99609375, "step": 67 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4061860046349466, "aux_distill/mean_u": 0.21555456906916828, "aux_distill/n_active_tok": 340.0, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5073344914109245, "calib/avg_num_step_conf": 5.35546875, "calib/ece": 0.3863671875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 8.556906646078617e-05, "calib/mean_conf": 0.0003515625, "calib/mu_c": 0.00040404040404040404, "calib/mu_w": 0.00031847133757961787, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002042860447655138, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 873.0, "completions/max_terminated_length": 873.0, "completions/mean_length": 284.3203125, "completions/mean_terminated_length": 285.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.07253333333333334, "grad_norm": 0.0074111889116466045, "learning_rate": 3.6666666666666666e-06, "loss": 0.0429, "num_tokens": 13297655.0, "reward": 1.1935133934020996, "reward_std": 0.12575292587280273, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6135894656181335, "rewards/format_reward_step": 1.0, "step": 68 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42660697270184755, "aux_distill/mean_u": 0.2442016385441469, "aux_distill/n_active_tok": 357.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5029569892473118, "calib/avg_num_step_conf": 5.53515625, "calib/ece": 0.36707509881422923, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00037768817204301064, "calib/mean_conf": 0.0005138339920948617, "calib/mu_c": 0.0007526881720430106, "calib/mu_w": 0.000375, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.003101192924258187, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2326.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 350.0625, "completions/mean_terminated_length": 350.0625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.0736, "grad_norm": 0.006129054818302393, "learning_rate": 3.638888888888889e-06, "loss": 0.098, "num_tokens": 13491767.0, "reward": 1.170190453529358, "reward_std": 0.1372353434562683, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.625537097454071, "rewards/format_reward_step": 0.98828125, "step": 69 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43751452304422855, "aux_distill/mean_u": 0.2280092215886282, "aux_distill/n_active_tok": 340.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4961018711018711, "calib/avg_num_step_conf": 5.3125, "calib/ece": 0.4124603174603174, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -7.796257796257797e-05, "calib/mean_conf": 0.00023809523809523812, "calib/mu_c": 0.0001923076923076923, "calib/mu_w": 0.0002702702702702703, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001524553389864964, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2103.0, "completions/max_terminated_length": 2103.0, "completions/mean_length": 338.734375, "completions/mean_terminated_length": 338.734375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.07466666666666667, "grad_norm": 0.006687454413622618, "learning_rate": 3.6111111111111115e-06, "loss": 0.0793, "num_tokens": 13685475.0, "reward": 1.1875770092010498, "reward_std": 0.15229782462120056, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5782788991928101, "rewards/format_reward_step": 0.984375, "step": 70 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41422352846711874, "aux_distill/mean_u": 0.2371021008723161, "aux_distill/n_active_tok": 377.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5053293611386958, "calib/avg_num_step_conf": 5.8984375, "calib/ece": 0.37366141732283464, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0001065872227739159, "calib/mean_conf": 0.00035433070866141734, "calib/mu_c": 0.0004210526315789474, "calib/mu_w": 0.0003144654088050315, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018487176191927393, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1979.0, "completions/max_terminated_length": 1979.0, "completions/mean_length": 365.3671875, "completions/mean_terminated_length": 365.3671875, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.07573333333333333, "grad_norm": 0.005943302530795336, "learning_rate": 3.5833333333333335e-06, "loss": 0.0653, "num_tokens": 13883417.0, "reward": 1.1778888702392578, "reward_std": 0.15389281511306763, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6214027404785156, "rewards/format_reward_step": 0.9921875, "step": 71 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43518350925296545, "aux_distill/mean_u": 0.2185392387426317, "aux_distill/n_active_tok": 331.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5210302457466919, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.3633201581027668, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005279503105590061, "calib/mean_conf": 0.00031620553359683795, "calib/mu_c": 0.0006521739130434782, "calib/mu_w": 0.00012422360248447205, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.001962799844732828, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2768.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 308.15625, "completions/mean_terminated_length": 308.15625, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0768, "grad_norm": 0.008181699551641941, "learning_rate": 3.555555555555556e-06, "loss": 0.1023, "num_tokens": 14066713.0, "reward": 1.164294958114624, "reward_std": 0.17150144279003143, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.6254648566246033, "rewards/format_reward_step": 0.984375, "step": 72 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41626708675175905, "aux_distill/mean_u": 0.194263666773112, "aux_distill/n_active_tok": 314.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49814814814814823, "calib/avg_num_step_conf": 4.9140625, "calib/ece": 0.47023529411764703, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -3.7037037037037084e-05, "calib/mean_conf": 0.0003529411764705882, "calib/mu_c": 0.0003333333333333333, "calib/mu_w": 0.0003703703703703704, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018452220166303669, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2391.0, "completions/max_terminated_length": 2391.0, "completions/mean_length": 315.80859375, "completions/mean_terminated_length": 315.80859375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.07786666666666667, "grad_norm": 0.006817559711635113, "learning_rate": 3.5277777777777784e-06, "loss": 0.0434, "num_tokens": 14254592.0, "reward": 1.2306232452392578, "reward_std": 0.15328508615493774, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5276527404785156, "rewards/format_reward_step": 0.99609375, "step": 73 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4087336999364197, "aux_distill/mean_u": 0.17648553013055804, "aux_distill/n_active_tok": 318.25, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5107051282051283, "calib/avg_num_step_conf": 5.0234375, "calib/ece": 0.40909448818897637, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00021410256410256412, "calib/mean_conf": 0.0003543307086614173, "calib/mu_c": 0.0004807692307692308, "calib/mu_w": 0.0002666666666666667, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018487176191927398, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2514.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 302.38671875, "completions/mean_terminated_length": 303.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.07893333333333333, "grad_norm": 0.008606214076280594, "learning_rate": 3.5e-06, "loss": 0.0454, "num_tokens": 14435931.0, "reward": 1.1955060958862305, "reward_std": 0.1825999915599823, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5863245725631714, "rewards/format_reward_step": 0.9921875, "step": 74 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37989877723157406, "aux_distill/mean_u": 0.2001054968370109, "aux_distill/n_active_tok": 312.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5027925531914894, "calib/avg_num_step_conf": 4.88671875, "calib/ece": 0.6295669291338583, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 5.585106382978726e-05, "calib/mean_conf": 0.0003543307086614173, "calib/mu_c": 0.000375, "calib/mu_w": 0.00031914893617021275, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0018487176191927398, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2718.0, "completions/max_terminated_length": 2718.0, "completions/mean_length": 307.0546875, "completions/mean_terminated_length": 307.0546875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.08, "grad_norm": 0.008363153785467148, "learning_rate": 3.4722222222222224e-06, "loss": 0.0629, "num_tokens": 14619289.0, "reward": 1.3088264465332031, "reward_std": 0.15875719487667084, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.3676527142524719, "rewards/format_reward_step": 0.9921875, "step": 75 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4291513739153743, "aux_distill/mean_u": 0.23722198629035704, "aux_distill/n_active_tok": 308.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49385964912280694, "calib/avg_num_step_conf": 4.82421875, "calib/ece": 0.47391304347826085, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 4.072681704260652e-05, "calib/mean_conf": 0.0003952569169960474, "calib/mu_c": 0.0004166666666666667, "calib/mu_w": 0.00037593984962406017, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0024835222248861676, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2518.0, "completions/max_terminated_length": 2518.0, "completions/mean_length": 312.01171875, "completions/mean_terminated_length": 312.01171875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.08106666666666666, "grad_norm": 0.008254244923591614, "learning_rate": 3.444444444444445e-06, "loss": 0.0841, "num_tokens": 14802220.0, "reward": 1.2228484153747559, "reward_std": 0.1601463109254837, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5199155807495117, "rewards/format_reward_step": 0.98828125, "step": 76 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4120447961613536, "aux_distill/mean_u": 0.22283618500480631, "aux_distill/n_active_tok": 331.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4801587301587301, "calib/avg_num_step_conf": 5.1796875, "calib/ece": 0.49556, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005529953917050691, "calib/mean_conf": 0.00044000000000000007, "calib/mu_c": 0.00016129032258064516, "calib/mu_w": 0.0007142857142857143, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0024096472770926453, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 319.32421875, "completions/mean_terminated_length": 319.32421875, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.08213333333333334, "grad_norm": 0.009399495087563992, "learning_rate": 3.416666666666667e-06, "loss": 0.1611, "num_tokens": 14988631.0, "reward": 1.2227314710617065, "reward_std": 0.20367680490016937, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.4923378825187683, "rewards/format_reward_step": 0.9765625, "step": 77 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4121271399781108, "aux_distill/mean_u": 0.21187937211055408, "aux_distill/n_active_tok": 332.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48820649755229195, "calib/avg_num_step_conf": 5.19921875, "calib/ece": 0.4206299212598425, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00028100960010172296, "calib/mean_conf": 0.0006299212598425197, "calib/mu_c": 0.00046728971962616824, "calib/mu_w": 0.0007482993197278912, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0030086571928413857, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2161.0, "completions/max_terminated_length": 2161.0, "completions/mean_length": 346.95703125, "completions/mean_terminated_length": 346.95703125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.0832, "grad_norm": 0.007775504142045975, "learning_rate": 3.3888888888888893e-06, "loss": 0.0838, "num_tokens": 15185476.0, "reward": 1.2013624906539917, "reward_std": 0.14862890541553497, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.5745999813079834, "rewards/format_reward_step": 0.9921875, "step": 78 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40545395016670227, "aux_distill/mean_u": 0.21084205152926944, "aux_distill/n_active_tok": 333.75, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.49871700879765396, "calib/avg_num_step_conf": 5.26953125, "calib/ece": 0.5151171875, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 4.643206256109482e-05, "calib/mean_conf": 0.0005078125, "calib/mu_c": 0.0005303030303030304, "calib/mu_w": 0.00048387096774193554, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002366749134328298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 760.0, "completions/max_terminated_length": 760.0, "completions/mean_length": 340.0703125, "completions/mean_terminated_length": 341.4039306640625, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.08426666666666667, "grad_norm": 0.00761050172150135, "learning_rate": 3.3611111111111117e-06, "loss": 0.0376, "num_tokens": 15378910.0, "reward": 1.2580829858779907, "reward_std": 0.12420278042554855, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.4849160313606262, "rewards/format_reward_step": 1.0, "step": 79 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41106561524793506, "aux_distill/mean_u": 0.21736490320900576, "aux_distill/n_active_tok": 301.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5141188828472566, "calib/avg_num_step_conf": 4.69140625, "calib/ece": 0.46619607843137256, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00037815126050420167, "calib/mean_conf": 0.00047058823529411766, "calib/mu_c": 0.0006722689075630252, "calib/mu_w": 0.00029411764705882356, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0027607397500859676, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1661.0, "completions/max_terminated_length": 1661.0, "completions/mean_length": 289.74609375, "completions/mean_terminated_length": 289.74609375, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.08533333333333333, "grad_norm": 0.009661182761192322, "learning_rate": 3.3333333333333333e-06, "loss": 0.0838, "num_tokens": 15555245.0, "reward": 1.228824257850647, "reward_std": 0.15840938687324524, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5318671464920044, "rewards/format_reward_step": 0.99609375, "step": 80 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41248337598517537, "aux_distill/mean_u": 0.23188753956743624, "aux_distill/n_active_tok": 289.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5129652605459057, "calib/avg_num_step_conf": 4.5859375, "calib/ece": 0.48771653543307086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00033746898263027294, "calib/mean_conf": 0.00047244094488188977, "calib/mu_c": 0.0006451612903225806, "calib/mu_w": 0.0003076923076923077, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.002299697931733895, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1484.0, "completions/max_terminated_length": 1484.0, "completions/mean_length": 317.109375, "completions/mean_terminated_length": 318.35296630859375, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.0864, "grad_norm": 0.009779230691492558, "learning_rate": 3.3055555555555558e-06, "loss": 0.053, "num_tokens": 15742673.0, "reward": 1.234684705734253, "reward_std": 0.16173626482486725, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5084320306777954, "rewards/format_reward_step": 0.9921875, "step": 81 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.405979058239609, "aux_distill/mean_u": 0.1937273843168663, "aux_distill/n_active_tok": 243.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49200253887654716, "calib/avg_num_step_conf": 3.80859375, "calib/ece": 0.5431746031746031, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00024373214852427803, "calib/mean_conf": 0.0004761904761904762, "calib/mu_c": 0.00036496350364963507, "calib/mu_w": 0.0006086956521739131, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0023084189797220617, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2440.0, "completions/max_terminated_length": 2440.0, "completions/mean_length": 279.09765625, "completions/mean_terminated_length": 282.4071350097656, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.08746666666666666, "grad_norm": 0.0082706892862916, "learning_rate": 3.277777777777778e-06, "loss": 0.033, "num_tokens": 15919674.0, "reward": 1.248239517211914, "reward_std": 0.1699363887310028, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4456976652145386, "rewards/format_reward_step": 0.98046875, "step": 82 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3855320755392313, "aux_distill/mean_u": 0.20919488492473376, "aux_distill/n_active_tok": 240.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48275862068965514, "calib/avg_num_step_conf": 3.7578125, "calib/ece": 0.5407905138339921, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00027812735967782537, "calib/mean_conf": 0.0007114624505928853, "calib/mu_c": 0.0005839416058394161, "calib/mu_w": 0.0008620689655172415, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.0027201028512387755, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 876.0, "completions/max_terminated_length": 876.0, "completions/mean_length": 332.66796875, "completions/mean_terminated_length": 336.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.08853333333333334, "grad_norm": 0.007690061815083027, "learning_rate": 3.2500000000000002e-06, "loss": 0.0329, "num_tokens": 16112101.0, "reward": 1.2483553886413574, "reward_std": 0.13580173254013062, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4498359262943268, "rewards/format_reward_step": 0.9765625, "step": 83 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4101240658201277, "aux_distill/mean_u": 0.20711848674623146, "aux_distill/n_active_tok": 207.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5073301991834716, "calib/avg_num_step_conf": 3.24609375, "calib/ece": 0.4622745098039216, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00022825683533341575, "calib/mean_conf": 0.00047058823529411766, "calib/mu_c": 0.0005932203389830508, "calib/mu_w": 0.00036496350364963507, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0022953742159469645, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2607.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 287.5390625, "completions/mean_terminated_length": 287.5390625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.0896, "grad_norm": 0.010937503539025784, "learning_rate": 3.2222222222222227e-06, "loss": 0.028, "num_tokens": 16291631.0, "reward": 1.2268332242965698, "reward_std": 0.12932856380939484, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5356976389884949, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3972956840880215, "aux_distill/mean_u": 0.2200048133976546, "aux_distill/n_active_tok": 222.0, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5182291666666666, "calib/avg_num_step_conf": 3.46875, "calib/ece": 0.4829435483870968, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00014062500000000002, "calib/mean_conf": 0.0009274193548387097, "calib/mu_c": 0.001, "calib/mu_w": 0.000859375, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.004703590576146321, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2342.0, "completions/max_terminated_length": 2342.0, "completions/mean_length": 325.8125, "completions/mean_terminated_length": 327.0902099609375, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.09066666666666667, "grad_norm": 0.009536146186292171, "learning_rate": 3.1944444444444443e-06, "loss": 0.1186, "num_tokens": 16482863.0, "reward": 1.1918638944625854, "reward_std": 0.2307528555393219, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.48919644951820374, "rewards/format_reward_step": 0.95703125, "step": 85 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.396264610812068, "aux_distill/mean_u": 0.1669753048582908, "aux_distill/n_active_tok": 150.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5228428050052137, "calib/avg_num_step_conf": 2.359375, "calib/ece": 0.44710843373493975, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004846193952033369, "calib/mean_conf": 0.0026907630522088354, "calib/mu_c": 0.005357142857142858, "calib/mu_w": 0.000510948905109489, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0, "calib/std_conf": 0.03170504050452634, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1915.0, "completions/max_terminated_length": 1915.0, "completions/mean_length": 301.15625, "completions/mean_terminated_length": 304.727294921875, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.09173333333333333, "grad_norm": 0.009685726836323738, "learning_rate": 3.1666666666666667e-06, "loss": 0.0231, "num_tokens": 16665471.0, "reward": 1.1756794452667236, "reward_std": 0.2164359986782074, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5232340097427368, "rewards/format_reward_step": 0.953125, "step": 86 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3704542489722371, "aux_distill/mean_u": 0.1592296599675276, "aux_distill/n_active_tok": 132.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5052309782608695, "calib/avg_num_step_conf": 2.06640625, "calib/ece": 0.634047619047619, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 5.434782608695963e-06, "calib/mean_conf": 0.0008730158730158729, "calib/mu_c": 0.0008750000000000001, "calib/mu_w": 0.0008695652173913042, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0, "calib/std_conf": 0.0029600089204716695, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 306.0625, "completions/mean_terminated_length": 307.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 62.0, "epoch": 0.0928, "grad_norm": 0.01069539226591587, "learning_rate": 3.138888888888889e-06, "loss": 0.1338, "num_tokens": 16849319.0, "reward": 1.283745288848877, "reward_std": 0.1876409649848938, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.3487406373023987, "rewards/format_reward_step": 0.96875, "step": 87 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.396932493429631, "aux_distill/mean_u": 0.1686609234334076, "aux_distill/n_active_tok": 154.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5094345760947361, "calib/avg_num_step_conf": 2.4140625, "calib/ece": 0.563, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00014639859457349228, "calib/mean_conf": 0.001, "calib/mu_c": 0.0010638297872340428, "calib/mu_w": 0.0009174311926605505, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.0, "calib/std_conf": 0.0036055512754639895, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 377.2734375, "completions/mean_terminated_length": 378.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.09386666666666667, "grad_norm": 0.009100557304918766, "learning_rate": 3.1111111111111116e-06, "loss": 0.1337, "num_tokens": 17055749.0, "reward": 1.2193293571472168, "reward_std": 0.21440431475639343, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.39959609508514404, "rewards/format_reward_step": 0.9375, "step": 88 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.397604210767895, "aux_distill/mean_u": 0.1975274313688232, "aux_distill/n_active_tok": 137.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4970833333333333, "calib/avg_num_step_conf": 2.14453125, "calib/ece": 0.5191600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00014743589743589753, "calib/mean_conf": 0.00084, "calib/mu_c": 0.0007692307692307692, "calib/mu_w": 0.0009166666666666668, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.0, "calib/std_conf": 0.0033006665993401997, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2465.0, "completions/max_terminated_length": 2465.0, "completions/mean_length": 350.140625, "completions/mean_terminated_length": 351.51373291015625, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.09493333333333333, "grad_norm": 0.010221146047115326, "learning_rate": 3.0833333333333336e-06, "loss": 0.1189, "num_tokens": 17254273.0, "reward": 1.1858930587768555, "reward_std": 0.2175370752811432, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.43428629636764526, "rewards/format_reward_step": 0.921875, "step": 89 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39298654487356544, "aux_distill/mean_u": 0.22362391264261372, "aux_distill/n_active_tok": 144.53125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5160235448005231, "calib/avg_num_step_conf": 2.2578125, "calib/ece": 0.5571084337349397, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0005487246566383257, "calib/mean_conf": 0.0011244979919678717, "calib/mu_c": 0.001366906474820144, "calib/mu_w": 0.0008181818181818182, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.0, "calib/std_conf": 0.0046071101685783095, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 334.0234375, "completions/mean_terminated_length": 334.0234375, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.096, "grad_norm": 0.011843850836157799, "learning_rate": 3.055555555555556e-06, "loss": 0.1311, "num_tokens": 17443103.0, "reward": 1.2019033432006836, "reward_std": 0.25413912534713745, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.3920878767967224, "rewards/format_reward_step": 0.92578125, "step": 90 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3809904200024903, "aux_distill/mean_u": 0.13741306957077365, "aux_distill/n_active_tok": 114.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.46223776223776225, "calib/avg_num_step_conf": 1.79296875, "calib/ece": 0.5761290322580646, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00244955044955045, "calib/mean_conf": 0.0020161290322580645, "calib/mu_c": 0.000979020979020979, "calib/mu_w": 0.003428571428571429, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0007661290322580645, "calib/std_conf": 0.012603112692177985, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2177.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 320.0546875, "completions/mean_terminated_length": 321.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.09706666666666666, "grad_norm": 0.008399066515266895, "learning_rate": 3.0277777777777776e-06, "loss": 0.1069, "num_tokens": 17632749.0, "reward": 1.2387499809265137, "reward_std": 0.18196678161621094, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.40328124165534973, "rewards/format_reward_step": 0.95703125, "step": 91 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.41714442893862724, "aux_distill/mean_u": 0.15397619196037046, "aux_distill/n_active_tok": 116.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47993827160493824, "calib/avg_num_step_conf": 1.82421875, "calib/ece": 0.5702380952380952, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0005092592592592591, "calib/mean_conf": 0.0011904761904761906, "calib/mu_c": 0.0009722222222222223, "calib/mu_w": 0.0014814814814814814, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.0, "calib/std_conf": 0.0035872664698379433, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1886.0, "completions/max_terminated_length": 1886.0, "completions/mean_length": 301.79296875, "completions/mean_terminated_length": 304.1692810058594, "completions/min_length": 0.0, "completions/min_terminated_length": 113.0, "epoch": 0.09813333333333334, "grad_norm": 0.010597274638712406, "learning_rate": 3e-06, "loss": 0.0414, "num_tokens": 17816728.0, "reward": 1.246633529663086, "reward_std": 0.19921471178531647, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.4112359583377838, "rewards/format_reward_step": 0.95703125, "step": 92 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3879448645748198, "aux_distill/mean_u": 0.15627032364352322, "aux_distill/n_active_tok": 140.25, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4699326898653797, "calib/avg_num_step_conf": 2.19140625, "calib/ece": 0.4923107569721116, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000835661671323343, "calib/mean_conf": 0.0017131474103585659, "calib/mu_c": 0.0012903225806451613, "calib/mu_w": 0.0021259842519685043, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.0, "calib/std_conf": 0.005190915529849348, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2323.0, "completions/max_terminated_length": 2323.0, "completions/mean_length": 369.08984375, "completions/mean_terminated_length": 369.08984375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.0992, "grad_norm": 0.008991632610559464, "learning_rate": 2.9722222222222225e-06, "loss": 0.131, "num_tokens": 18016991.0, "reward": 1.1959228515625, "reward_std": 0.21242350339889526, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.4816894233226776, "rewards/format_reward_step": 0.94140625, "step": 93 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3044614642858505, "aux_distill/mean_u": 0.13583506846250099, "aux_distill/n_active_tok": 105.75, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4786786786786786, "calib/avg_num_step_conf": 1.65234375, "calib/ece": 0.5476016260162602, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004784784784784787, "calib/mean_conf": 0.0011788617886178863, "calib/mu_c": 0.000962962962962963, "calib/mu_w": 0.0014414414414414417, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.0, "calib/std_conf": 0.004111321905441839, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2710.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 324.40234375, "completions/mean_terminated_length": 325.6745300292969, "completions/min_length": 0.0, "completions/min_terminated_length": 46.0, "epoch": 0.10026666666666667, "grad_norm": 0.010803716257214546, "learning_rate": 2.944444444444445e-06, "loss": 0.1487, "num_tokens": 18208718.0, "reward": 1.1997181177139282, "reward_std": 0.194935142993927, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4111550450325012, "rewards/format_reward_step": 0.93359375, "step": 94 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40034619299694896, "aux_distill/mean_u": 0.21922548809225598, "aux_distill/n_active_tok": 99.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5176317501626545, "calib/avg_num_step_conf": 1.5625, "calib/ece": 0.5754183266932271, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001318152244632401, "calib/mean_conf": 0.002270916334661355, "calib/mu_c": 0.002827586206896552, "calib/mu_w": 0.0015094339622641509, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.0, "calib/std_conf": 0.007469880474262826, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2507.0, "completions/max_terminated_length": 2507.0, "completions/mean_length": 325.10546875, "completions/mean_terminated_length": 325.10546875, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.10133333333333333, "grad_norm": 0.009412271901965141, "learning_rate": 2.916666666666667e-06, "loss": 0.1876, "num_tokens": 18398073.0, "reward": 1.2457122802734375, "reward_std": 0.19631265103816986, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.4015808701515198, "rewards/format_reward_step": 0.95703125, "step": 95 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3630610196851194, "aux_distill/mean_u": 0.14667928688553772, "aux_distill/n_active_tok": 90.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4754359925788498, "calib/avg_num_step_conf": 1.40625, "calib/ece": 0.6922222222222222, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0014753246753246756, "calib/mean_conf": 0.0022222222222222222, "calib/mu_c": 0.0017714285714285714, "calib/mu_w": 0.003246753246753247, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0, "calib/std_conf": 0.007806070987896382, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2452.0, "completions/max_terminated_length": 2452.0, "completions/mean_length": 289.3046875, "completions/mean_terminated_length": 289.3046875, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.1024, "grad_norm": 0.008919981308281422, "learning_rate": 2.888888888888889e-06, "loss": 0.1522, "num_tokens": 18577951.0, "reward": 1.309772253036499, "reward_std": 0.1510075330734253, "rewards/accuracy_reward_step": 0.68359375, "rewards/final_brier_reward_step": 0.29141953587532043, "rewards/format_reward_step": 0.9609375, "step": 96 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3719734316691756, "aux_distill/mean_u": 0.1676030758858746, "aux_distill/n_active_tok": 99.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.49187392267914315, "calib/avg_num_step_conf": 1.55859375, "calib/ece": 0.48415686274509806, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0001976114257572028, "calib/mean_conf": 0.0021176470588235297, "calib/mu_c": 0.0020161290322580645, "calib/mu_w": 0.0022137404580152673, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.0, "calib/std_conf": 0.005758700353186079, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1841.0, "completions/max_terminated_length": 1841.0, "completions/mean_length": 268.06640625, "completions/mean_terminated_length": 268.06640625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.10346666666666667, "grad_norm": 0.012327205389738083, "learning_rate": 2.861111111111111e-06, "loss": 0.0713, "num_tokens": 18751648.0, "reward": 1.2157626152038574, "reward_std": 0.19388362765312195, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.4940253496170044, "rewards/format_reward_step": 0.96875, "step": 97 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3803948569111526, "aux_distill/mean_u": 0.18490822814105912, "aux_distill/n_active_tok": 78.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4868605689501211, "calib/avg_num_step_conf": 1.2265625, "calib/ece": 0.5311155378486055, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.00796812749003984, "calib/gap": -0.0024122974869243523, "calib/mean_conf": 0.010079681274900398, "calib/mu_c": 0.008955223880597015, "calib/mu_w": 0.011367521367521368, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.003665338645418326, "calib/std_conf": 0.08539102480372213, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1891.0, "completions/max_terminated_length": 1891.0, "completions/mean_length": 296.8359375, "completions/mean_terminated_length": 296.8359375, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "epoch": 0.10453333333333334, "grad_norm": 0.010739736258983612, "learning_rate": 2.8333333333333335e-06, "loss": 0.1146, "num_tokens": 18933822.0, "reward": 1.2334849834442139, "reward_std": 0.20740289986133575, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.45134490728378296, "rewards/format_reward_step": 0.96875, "step": 98 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36638830602169037, "aux_distill/mean_u": 0.11033139603598505, "aux_distill/n_active_tok": 78.0, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5610946745562129, "calib/avg_num_step_conf": 1.21875, "calib/ece": 0.3180321285140562, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004231508875739645, "calib/mean_conf": 0.0032530120481927714, "calib/mu_c": 0.006125, "calib/mu_w": 0.0018934911242603554, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.0, "calib/std_conf": 0.013660405105061599, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2102.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 352.171875, "completions/mean_terminated_length": 353.5529479980469, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.1056, "grad_norm": 0.007696256972849369, "learning_rate": 2.805555555555556e-06, "loss": 0.1199, "num_tokens": 19129778.0, "reward": 1.111154556274414, "reward_std": 0.21740788221359253, "rewards/accuracy_reward_step": 0.3125, "rewards/final_brier_reward_step": 0.6480902433395386, "rewards/format_reward_step": 0.94921875, "step": 99 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3693304159678519, "aux_distill/mean_u": 0.19013945568804616, "aux_distill/n_active_tok": 78.75, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5161803713527852, "calib/avg_num_step_conf": 1.23046875, "calib/ece": 0.5249999999999999, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0018408488063660472, "calib/mean_conf": 0.004593495934959349, "calib/mu_c": 0.005461538461538461, "calib/mu_w": 0.003620689655172414, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.0005691056910569107, "calib/std_conf": 0.01820271461858796, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2178.0, "completions/max_terminated_length": 2178.0, "completions/mean_length": 333.40625, "completions/mean_terminated_length": 334.7137451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 63.0, "epoch": 0.10666666666666667, "grad_norm": 0.008256965316832066, "learning_rate": 2.7777777777777783e-06, "loss": 0.153, "num_tokens": 19322538.0, "reward": 1.1979174613952637, "reward_std": 0.2293814718723297, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.4388035237789154, "rewards/format_reward_step": 0.94140625, "step": 100 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3319393121637404, "aux_distill/mean_u": 0.09735452389920093, "aux_distill/n_active_tok": 72.78125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5732998506854894, "calib/avg_num_step_conf": 1.13671875, "calib/ece": 0.42551020408163265, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.004081632653061225, "calib/gap": 0.010085516492466405, "calib/mean_conf": 0.009183673469387756, "calib/mu_c": 0.014905660377358491, "calib/mu_w": 0.004820143884892086, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9453125, "calib/pce": 0.0010204081632653062, "calib/std_conf": 0.06635208299004534, "calib/step_conf_rate": 0.9453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2982.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 355.12890625, "completions/mean_terminated_length": 357.9252014160156, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.10773333333333333, "grad_norm": 0.00929940678179264, "learning_rate": 2.7500000000000004e-06, "loss": 0.1571, "num_tokens": 19520443.0, "reward": 1.1560661792755127, "reward_std": 0.2282753884792328, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5426011681556702, "rewards/format_reward_step": 0.94140625, "step": 101 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34266128670424223, "aux_distill/mean_u": 0.12935753004547906, "aux_distill/n_active_tok": 78.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5238063660477453, "calib/avg_num_step_conf": 1.23046875, "calib/ece": 0.5739759036144578, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.008730769230769231, "calib/mean_conf": 0.008353413654618475, "calib/mu_c": 0.012, "calib/mu_w": 0.0032692307692307695, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.0, "calib/std_conf": 0.0639347339025536, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1889.0, "completions/max_terminated_length": 1889.0, "completions/mean_length": 276.265625, "completions/mean_terminated_length": 277.3490295410156, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.1088, "grad_norm": 0.008674833923578262, "learning_rate": 2.7222222222222224e-06, "loss": 0.0695, "num_tokens": 19697863.0, "reward": 1.2467310428619385, "reward_std": 0.18275701999664307, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.40752461552619934, "rewards/format_reward_step": 0.9453125, "step": 102 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.43721392983570695, "aux_distill/mean_u": 0.1723636638590128, "aux_distill/n_active_tok": 73.03125, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.493941701862494, "calib/avg_num_step_conf": 1.140625, "calib/ece": 0.5790163934426229, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.012295081967213115, "calib/gap": 0.001057951949041059, "calib/mean_conf": 0.016065573770491802, "calib/mu_c": 0.016503496503496504, "calib/mu_w": 0.015445544554455445, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.953125, "calib/pce": 0.004508196721311476, "calib/std_conf": 0.11026463976292598, "calib/step_conf_rate": 0.953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 396.515625, "completions/mean_terminated_length": 398.07061767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.10986666666666667, "grad_norm": 0.009121976792812347, "learning_rate": 2.6944444444444444e-06, "loss": 0.214, "num_tokens": 19903923.0, "reward": 1.2337702512741089, "reward_std": 0.2346125990152359, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.4011343717575073, "rewards/format_reward_step": 0.94921875, "step": 103 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3781659468077123, "aux_distill/mean_u": 0.16131986090351855, "aux_distill/n_active_tok": 85.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.478180307967542, "calib/avg_num_step_conf": 1.33984375, "calib/ece": 0.43587301587301586, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.0010964155645006683, "calib/mean_conf": 0.01253968253968254, "calib/mu_c": 0.013153153153153152, "calib/mu_w": 0.012056737588652484, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.003968253968253968, "calib/std_conf": 0.08897954560688179, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1861.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 317.05859375, "completions/mean_terminated_length": 317.05859375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.11093333333333333, "grad_norm": 0.00879514031112194, "learning_rate": 2.666666666666667e-06, "loss": 0.1337, "num_tokens": 20091770.0, "reward": 1.1931354999542236, "reward_std": 0.19186003506183624, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5464273691177368, "rewards/format_reward_step": 0.97265625, "step": 104 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3944840473122895, "aux_distill/mean_u": 0.1414717727707358, "aux_distill/n_active_tok": 80.5, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.49540495867768597, "calib/avg_num_step_conf": 1.26171875, "calib/ece": 0.5008130081300813, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.012195121951219513, "calib/gap": -0.02308099173553719, "calib/mean_conf": 0.015447154471544714, "calib/mu_c": 0.00371900826446281, "calib/mu_w": 0.026799999999999997, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.012195121951219513, "calib/std_conf": 0.10962057275702294, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2820.0, "completions/max_terminated_length": 2820.0, "completions/mean_length": 367.71875, "completions/mean_terminated_length": 370.6141662597656, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.112, "grad_norm": 0.00771205173805356, "learning_rate": 2.6388888888888893e-06, "loss": 0.1899, "num_tokens": 20291666.0, "reward": 1.1714603900909424, "reward_std": 0.302039235830307, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.46010822057724, "rewards/format_reward_step": 0.9375, "step": 105 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4089339836500585, "aux_distill/mean_u": 0.1628464583554884, "aux_distill/n_active_tok": 73.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.48087277736038064, "calib/avg_num_step_conf": 1.15234375, "calib/ece": 0.5207166007905139, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.011628374655647382, "calib/mean_conf": 0.011773517786561265, "calib/mu_c": 0.006212121212121212, "calib/mu_w": 0.017840495867768594, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.005375494071146245, "calib/std_conf": 0.06562255566896466, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2436.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 318.90234375, "completions/mean_terminated_length": 320.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.11306666666666666, "grad_norm": 0.009178507141768932, "learning_rate": 2.6111111111111113e-06, "loss": 0.0854, "num_tokens": 20477889.0, "reward": 1.230987548828125, "reward_std": 0.17167076468467712, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.46197509765625, "rewards/format_reward_step": 0.96875, "step": 106 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3847769391722977, "aux_distill/mean_u": 0.1593563057381601, "aux_distill/n_active_tok": 77.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.42153976975405544, "calib/avg_num_step_conf": 1.21484375, "calib/ece": 0.5882629482071713, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.01593625498007968, "calib/gap": -0.026485609628466772, "calib/mean_conf": 0.022334661354581672, "calib/mu_c": 0.011360544217687077, "calib/mu_w": 0.03784615384615385, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.01247011952191235, "calib/std_conf": 0.12520179549592428, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2587.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 315.31640625, "completions/mean_terminated_length": 316.5529479980469, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.11413333333333334, "grad_norm": 0.0072776032611727715, "learning_rate": 2.5833333333333337e-06, "loss": 0.0908, "num_tokens": 20663226.0, "reward": 1.2622661590576172, "reward_std": 0.20053136348724365, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.3995321989059448, "rewards/format_reward_step": 0.9765625, "step": 107 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36748338444158435, "aux_distill/mean_u": 0.16356597389892147, "aux_distill/n_active_tok": 74.03125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4810773074661963, "calib/avg_num_step_conf": 1.15625, "calib/ece": 0.6667469879518073, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.00010802469135802462, "calib/mean_conf": 0.007951807228915662, "calib/mu_c": 0.007916666666666667, "calib/mu_w": 0.008024691358024692, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9765625, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.0, "calib/std_conf": 0.018762457354271893, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2877.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 357.296875, "completions/mean_terminated_length": 358.69805908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.1152, "grad_norm": 0.009626587852835655, "learning_rate": 2.5555555555555557e-06, "loss": 0.0949, "num_tokens": 20857926.0, "reward": 1.3018684387207031, "reward_std": 0.18892589211463928, "rewards/accuracy_reward_step": 0.65625, "rewards/final_brier_reward_step": 0.3224866986274719, "rewards/format_reward_step": 0.96875, "step": 108 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.343574243132025, "aux_distill/mean_u": 0.10433978655256294, "aux_distill/n_active_tok": 77.28125, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4994190814652815, "calib/avg_num_step_conf": 1.20703125, "calib/ece": 0.4774793388429752, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008027610716238382, "calib/mean_conf": 0.010123966942148762, "calib/mu_c": 0.014237288135593221, "calib/mu_w": 0.006209677419354838, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.9609375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.0, "calib/std_conf": 0.05285627952372183, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2876.0, "completions/max_terminated_length": 2876.0, "completions/mean_length": 404.6328125, "completions/mean_terminated_length": 409.43084716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.11626666666666667, "grad_norm": 0.008727338165044785, "learning_rate": 2.5277777777777778e-06, "loss": 0.1671, "num_tokens": 21066112.0, "reward": 1.1536353826522827, "reward_std": 0.2425643503665924, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.47133320569992065, "rewards/format_reward_step": 0.9140625, "step": 109 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36572849517688155, "aux_distill/mean_u": 0.1205168877068942, "aux_distill/n_active_tok": 67.28125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4670535011801731, "calib/avg_num_step_conf": 1.05078125, "calib/ece": 0.5018623481781377, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.004048582995951417, "calib/gap": -0.014848544453186466, "calib/mean_conf": 0.013765182186234818, "calib/mu_c": 0.006370967741935484, "calib/mu_w": 0.02121951219512195, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.95703125, "calib/pce": 0.006801619433198381, "calib/std_conf": 0.07747530311904471, "calib/step_conf_rate": 0.95703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 326.76171875, "completions/mean_terminated_length": 326.76171875, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.11733333333333333, "grad_norm": 0.008508248254656792, "learning_rate": 2.5e-06, "loss": 0.1231, "num_tokens": 21254683.0, "reward": 1.1972490549087524, "reward_std": 0.22007989883422852, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.4726230502128601, "rewards/format_reward_step": 0.953125, "step": 110 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3720876621082425, "aux_distill/mean_u": 0.15341549175611296, "aux_distill/n_active_tok": 68.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5270833333333333, "calib/avg_num_step_conf": 1.06640625, "calib/ece": 0.5183730158730159, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.002204545454545454, "calib/mean_conf": 0.008928571428571428, "calib/mu_c": 0.00787878787878788, "calib/mu_w": 0.010083333333333333, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.001746031746031746, "calib/std_conf": 0.030329585998626372, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1772.0, "completions/max_terminated_length": 1772.0, "completions/mean_length": 327.69921875, "completions/mean_terminated_length": 327.69921875, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.1184, "grad_norm": 0.008607271127402782, "learning_rate": 2.4722222222222226e-06, "loss": 0.0551, "num_tokens": 21445982.0, "reward": 1.239898681640625, "reward_std": 0.1710304617881775, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.47198477387428284, "rewards/format_reward_step": 0.9765625, "step": 111 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38653289526700974, "aux_distill/mean_u": 0.183389772523197, "aux_distill/n_active_tok": 62.84375, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5073285398230089, "calib/avg_num_step_conf": 0.98828125, "calib/ece": 0.5232780082987551, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0004362555309734501, "calib/mean_conf": 0.007842323651452283, "calib/mu_c": 0.008046874999999998, "calib/mu_w": 0.007610619469026548, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.953125, "calib/nonempty_step_conf_rate": 0.91796875, "calib/pce": 0.0, "calib/std_conf": 0.013827330009136139, "calib/step_conf_rate": 0.91796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 2946.0, "completions/max_terminated_length": 2946.0, "completions/mean_length": 389.703125, "completions/mean_terminated_length": 400.65863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.11946666666666667, "grad_norm": 0.008142063394188881, "learning_rate": 2.4444444444444447e-06, "loss": 0.1322, "num_tokens": 21653666.0, "reward": 1.1659774780273438, "reward_std": 0.24817338585853577, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.42570507526397705, "rewards/format_reward_step": 0.90625, "step": 112 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32456047693267465, "aux_distill/mean_u": 0.10668147590575155, "aux_distill/n_active_tok": 64.78125, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.5366289458010721, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.6068907563025211, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.001116736152471709, "calib/mean_conf": 0.006554621848739496, "calib/mu_c": 0.006986301369863014, "calib/mu_w": 0.005869565217391305, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.9375, "calib/nonempty_step_conf_rate": 0.9296875, "calib/pce": 0.0, "calib/std_conf": 0.011147368441759219, "calib/step_conf_rate": 0.9296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2135.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 338.66015625, "completions/mean_terminated_length": 344.0357360839844, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.12053333333333334, "grad_norm": 0.007761706598103046, "learning_rate": 2.4166666666666667e-06, "loss": 0.1085, "num_tokens": 21845563.0, "reward": 1.2109379768371582, "reward_std": 0.21525073051452637, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.3593761622905731, "rewards/format_reward_step": 0.921875, "step": 113 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3718952308408916, "aux_distill/mean_u": 0.16472218902211183, "aux_distill/n_active_tok": 69.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5613652868554829, "calib/avg_num_step_conf": 1.08984375, "calib/ece": 0.5928968253968254, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.011984551396316104, "calib/mean_conf": 0.014246031746031748, "calib/mu_c": 0.018954248366013074, "calib/mu_w": 0.00696969696969697, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0, "calib/std_conf": 0.06800317295444387, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1325.0, "completions/max_terminated_length": 1325.0, "completions/mean_length": 291.2421875, "completions/mean_terminated_length": 292.38433837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 100.0, "epoch": 0.1216, "grad_norm": 0.010775484144687653, "learning_rate": 2.388888888888889e-06, "loss": 0.097, "num_tokens": 22025145.0, "reward": 1.286218523979187, "reward_std": 0.17462275922298431, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.4005621075630188, "rewards/format_reward_step": 0.9765625, "step": 114 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3595115006901324, "aux_distill/mean_u": 0.12506166863110565, "aux_distill/n_active_tok": 67.78125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4552986391129032, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.48904761904761906, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.009470766129032263, "calib/mean_conf": 0.011746031746031746, "calib/mu_c": 0.0069354838709677425, "calib/mu_w": 0.016406250000000004, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.004365079365079365, "calib/std_conf": 0.06352201674409502, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2080.0, "completions/max_terminated_length": 2080.0, "completions/mean_length": 299.15625, "completions/mean_terminated_length": 300.3294372558594, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.12266666666666666, "grad_norm": 0.011229630559682846, "learning_rate": 2.361111111111111e-06, "loss": 0.026, "num_tokens": 22206993.0, "reward": 1.223961591720581, "reward_std": 0.18679305911064148, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.4987046718597412, "rewards/format_reward_step": 0.98046875, "step": 115 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3685760064981878, "aux_distill/mean_u": 0.11651055035050567, "aux_distill/n_active_tok": 71.59375, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5117604187371629, "calib/avg_num_step_conf": 1.1171875, "calib/ece": 0.5143089430894309, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0034148280659908586, "calib/mean_conf": 0.01008130081300813, "calib/mu_c": 0.011705426356589149, "calib/mu_w": 0.00829059829059829, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0, "calib/std_conf": 0.029590592192035723, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 353.63671875, "completions/mean_terminated_length": 355.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 59.0, "epoch": 0.12373333333333333, "grad_norm": 0.008860284462571144, "learning_rate": 2.3333333333333336e-06, "loss": 0.1086, "num_tokens": 22402044.0, "reward": 1.2144132852554321, "reward_std": 0.23440314829349518, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.46398282051086426, "rewards/format_reward_step": 0.95703125, "step": 116 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3026649123057723, "aux_distill/mean_u": 0.07919790339599429, "aux_distill/n_active_tok": 63.96875, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4968918069427953, "calib/avg_num_step_conf": 0.99609375, "calib/ece": 0.4164876033057851, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008939722008800728, "calib/mean_conf": 0.01756198347107438, "calib/mu_c": 0.012427184466019418, "calib/mu_w": 0.021366906474820146, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.004214876033057851, "calib/std_conf": 0.06447696655318512, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2947.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 320.8828125, "completions/mean_terminated_length": 324.6877746582031, "completions/min_length": 0.0, "completions/min_terminated_length": 103.0, "epoch": 0.1248, "grad_norm": 0.011900389567017555, "learning_rate": 2.305555555555556e-06, "loss": 0.0455, "num_tokens": 22590790.0, "reward": 1.1454674005508423, "reward_std": 0.20846307277679443, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5448409914970398, "rewards/format_reward_step": 0.94140625, "step": 117 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2679017949849367, "aux_distill/mean_u": 0.0878350996942736, "aux_distill/n_active_tok": 67.5625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5327660994327661, "calib/avg_num_step_conf": 1.05859375, "calib/ece": 0.53, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.008130081300813009, "calib/gap": 0.019511511511511516, "calib/mean_conf": 0.025121951219512193, "calib/mu_c": 0.03392592592592593, "calib/mu_w": 0.014414414414414415, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.96875, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.003170731707317074, "calib/std_conf": 0.11228495873075943, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3029.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 346.22265625, "completions/mean_terminated_length": 347.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.12586666666666665, "grad_norm": 0.01450740359723568, "learning_rate": 2.277777777777778e-06, "loss": 0.0959, "num_tokens": 22783431.0, "reward": 1.21885347366333, "reward_std": 0.24840456247329712, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.4416132867336273, "rewards/format_reward_step": 0.94140625, "step": 118 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32812713412567973, "aux_distill/mean_u": 0.12670988056552968, "aux_distill/n_active_tok": 62.75, "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.5259686102054743, "calib/avg_num_step_conf": 0.97265625, "calib/ece": 0.5122173913043478, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.004347826086956522, "calib/gap": 0.011715065584957156, "calib/mean_conf": 0.013869565217391307, "calib/mu_c": 0.01942148760330578, "calib/mu_w": 0.007706422018348625, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.0, "calib/std_conf": 0.068659718174567, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 362.19921875, "completions/mean_terminated_length": 365.0511779785156, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.12693333333333334, "grad_norm": 0.009933991357684135, "learning_rate": 2.25e-06, "loss": 0.1331, "num_tokens": 22981218.0, "reward": 1.1378350257873535, "reward_std": 0.33909130096435547, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.4358261823654175, "rewards/format_reward_step": 0.89453125, "step": 119 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2903097663074732, "aux_distill/mean_u": 0.09879584451015042, "aux_distill/n_active_tok": 65.34375, "calib/answer_extract_rate": 0.91015625, "calib/auroc": 0.5062885802469136, "calib/avg_num_step_conf": 1.01171875, "calib/ece": 0.5657575757575758, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.012987012987012988, "calib/gap": 0.010914351851851852, "calib/mean_conf": 0.027316017316017318, "calib/mu_c": 0.03185185185185185, "calib/mu_w": 0.0209375, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.00432900432900433, "calib/std_conf": 0.12327310915368074, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2181.0, "completions/max_terminated_length": 2181.0, "completions/mean_length": 306.7109375, "completions/mean_terminated_length": 306.7109375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.128, "grad_norm": 0.011160912923514843, "learning_rate": 2.222222222222222e-06, "loss": 0.0877, "num_tokens": 23166424.0, "reward": 1.1697453260421753, "reward_std": 0.26618820428848267, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.3824594020843506, "rewards/format_reward_step": 0.88671875, "step": 120 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.324223575880751, "aux_distill/mean_u": 0.08532104738627432, "aux_distill/n_active_tok": 65.875, "calib/answer_extract_rate": 0.8828125, "calib/auroc": 0.5099842767295597, "calib/avg_num_step_conf": 1.015625, "calib/ece": 0.5158407079646018, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002559748427672955, "calib/mean_conf": 0.015132743362831859, "calib/mu_c": 0.01633333333333333, "calib/mu_w": 0.013773584905660377, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05632734673157518, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 323.62109375, "completions/mean_terminated_length": 323.62109375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.12906666666666666, "grad_norm": 0.012316557578742504, "learning_rate": 2.1944444444444445e-06, "loss": 0.0656, "num_tokens": 23354327.0, "reward": 1.1233421564102173, "reward_std": 0.32146894931793213, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.42637187242507935, "rewards/format_reward_step": 0.8828125, "step": 121 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.345096837496385, "aux_distill/mean_u": 0.1192782161457965, "aux_distill/n_active_tok": 65.125, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5341607565011821, "calib/avg_num_step_conf": 1.0078125, "calib/ece": 0.5942419913419914, "calib/final_conf_rate": 0.90234375, "calib/format_rate": 0.8984375, "calib/frac_conf_gt_0.9": 0.004329004329004329, "calib/gap": 0.010071631205673764, "calib/mean_conf": 0.01614761904761905, "calib/mu_c": 0.020071631205673762, "calib/mu_w": 0.009999999999999998, "calib/nonempty_final_conf_rate": 0.90234375, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.07051663941031937, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2089.0, "completions/max_terminated_length": 2089.0, "completions/mean_length": 264.42578125, "completions/mean_terminated_length": 267.561279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.13013333333333332, "grad_norm": 0.01495366357266903, "learning_rate": 2.166666666666667e-06, "loss": 0.0432, "num_tokens": 23529364.0, "reward": 1.1825222969055176, "reward_std": 0.26760587096214294, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.3650445342063904, "rewards/format_reward_step": 0.8984375, "step": 122 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35667229630053043, "aux_distill/mean_u": 0.1399172233377629, "aux_distill/n_active_tok": 71.625, "calib/answer_extract_rate": 0.8828125, "calib/auroc": 0.5353483606557377, "calib/avg_num_step_conf": 1.10546875, "calib/ece": 0.4516814159292035, "calib/final_conf_rate": 0.8828125, "calib/format_rate": 0.8828125, "calib/frac_conf_gt_0.9": 0.017699115044247787, "calib/gap": 0.000890605296342991, "calib/mean_conf": 0.03, "calib/mu_c": 0.03048076923076922, "calib/mu_w": 0.02959016393442623, "calib/nonempty_final_conf_rate": 0.8828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.010752212389380532, "calib/std_conf": 0.13212691672741708, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2506.0, "completions/max_terminated_length": 2506.0, "completions/mean_length": 330.60546875, "completions/mean_terminated_length": 331.9019775390625, "completions/min_length": 0.0, "completions/min_terminated_length": 120.0, "epoch": 0.1312, "grad_norm": 0.012956635095179081, "learning_rate": 2.138888888888889e-06, "loss": 0.1546, "num_tokens": 23719287.0, "reward": 1.090217113494873, "reward_std": 0.332817405462265, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.48512187600135803, "rewards/format_reward_step": 0.8828125, "step": 123 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.303095584269613, "aux_distill/mean_u": 0.10875086624948242, "aux_distill/n_active_tok": 69.6875, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.4850164203612479, "calib/avg_num_step_conf": 1.0859375, "calib/ece": 0.48252066115702474, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.02066115702479339, "calib/gap": -0.024482758620689653, "calib/mean_conf": 0.03326446280991736, "calib/mu_c": 0.02051724137931034, "calib/mu_w": 0.04499999999999999, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01822314049586777, "calib/std_conf": 0.14261027448960573, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 596.0, "completions/max_terminated_length": 596.0, "completions/mean_length": 271.0859375, "completions/mean_terminated_length": 272.1490478515625, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.13226666666666667, "grad_norm": 0.01391842681914568, "learning_rate": 2.1111111111111114e-06, "loss": 0.0442, "num_tokens": 23895501.0, "reward": 1.1690056324005127, "reward_std": 0.2866358757019043, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.49035507440567017, "rewards/format_reward_step": 0.94140625, "step": 124 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.29174726200290024, "aux_distill/mean_u": 0.11426094989438287, "aux_distill/n_active_tok": 73.5, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.5308159722222223, "calib/avg_num_step_conf": 1.140625, "calib/ece": 0.438135593220339, "calib/final_conf_rate": 0.921875, "calib/format_rate": 0.91796875, "calib/frac_conf_gt_0.9": 0.0211864406779661, "calib/gap": 0.018342013888888894, "calib/mean_conf": 0.036440677966101696, "calib/mu_c": 0.046388888888888896, "calib/mu_w": 0.028046875000000002, "calib/nonempty_final_conf_rate": 0.921875, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.00847457627118644, "calib/std_conf": 0.1446710202770926, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1559.0, "completions/max_terminated_length": 1559.0, "completions/mean_length": 266.5859375, "completions/mean_terminated_length": 270.8174743652344, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.13333333333333333, "grad_norm": 0.013951667584478855, "learning_rate": 2.0833333333333334e-06, "loss": 0.0304, "num_tokens": 24068555.0, "reward": 1.1382172107696533, "reward_std": 0.23240628838539124, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5147156119346619, "rewards/format_reward_step": 0.91796875, "step": 125 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36404131818562746, "aux_distill/mean_u": 0.13878008918581453, "aux_distill/n_active_tok": 71.1875, "calib/answer_extract_rate": 0.89453125, "calib/auroc": 0.506964217006541, "calib/avg_num_step_conf": 1.1015625, "calib/ece": 0.4953947368421052, "calib/final_conf_rate": 0.890625, "calib/format_rate": 0.890625, "calib/frac_conf_gt_0.9": 0.017543859649122806, "calib/gap": -0.01719969218930357, "calib/mean_conf": 0.035307017543859634, "calib/mu_c": 0.026782608695652174, "calib/mu_w": 0.04398230088495574, "calib/nonempty_final_conf_rate": 0.890625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.013157894736842105, "calib/std_conf": 0.1323916059241489, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2965.0, "completions/max_terminated_length": 2965.0, "completions/mean_length": 292.92578125, "completions/mean_terminated_length": 294.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.1344, "grad_norm": 0.013217564672231674, "learning_rate": 2.0555555555555555e-06, "loss": 0.1714, "num_tokens": 24249008.0, "reward": 1.1189053058624268, "reward_std": 0.3188907504081726, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.44874805212020874, "rewards/format_reward_step": 0.890625, "step": 126 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3291368675418198, "aux_distill/mean_u": 0.11664523604299605, "aux_distill/n_active_tok": 66.5, "calib/answer_extract_rate": 0.8984375, "calib/auroc": 0.5259813932380304, "calib/avg_num_step_conf": 1.03125, "calib/ece": 0.47982608695652174, "calib/final_conf_rate": 0.8984375, "calib/format_rate": 0.89453125, "calib/frac_conf_gt_0.9": 0.0391304347826087, "calib/gap": 0.026621284320399362, "calib/mean_conf": 0.05486956521739131, "calib/mu_c": 0.06794871794871794, "calib/mu_w": 0.04132743362831858, "calib/nonempty_final_conf_rate": 0.8984375, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9609375, "calib/pce": 0.013000000000000005, "calib/std_conf": 0.19233966882616488, "calib/step_conf_rate": 0.9609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2925.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 293.3359375, "completions/mean_terminated_length": 296.8142395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.13546666666666668, "grad_norm": 0.012097050435841084, "learning_rate": 2.027777777777778e-06, "loss": 0.1988, "num_tokens": 24427774.0, "reward": 1.138083577156067, "reward_std": 0.3193005919456482, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.46757346391677856, "rewards/format_reward_step": 0.89453125, "step": 127 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3628509286791086, "aux_distill/mean_u": 0.13717759212656766, "aux_distill/n_active_tok": 68.78125, "calib/answer_extract_rate": 0.8828125, "calib/auroc": 0.50538571202281, "calib/avg_num_step_conf": 1.0625, "calib/ece": 0.4724888888888889, "calib/final_conf_rate": 0.87890625, "calib/format_rate": 0.87890625, "calib/frac_conf_gt_0.9": 0.017777777777777778, "calib/gap": -0.016759068588626646, "calib/mean_conf": 0.03355555555555556, "calib/mu_c": 0.02476635514018691, "calib/mu_w": 0.04152542372881356, "calib/nonempty_final_conf_rate": 0.87890625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.015244444444444444, "calib/std_conf": 0.13192852535547092, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2780.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 279.1953125, "completions/mean_terminated_length": 280.29022216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.13653333333333334, "grad_norm": 0.01438905205577612, "learning_rate": 2.0000000000000003e-06, "loss": 0.0887, "num_tokens": 24605912.0, "reward": 1.0940048694610596, "reward_std": 0.27575862407684326, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.46535351872444153, "rewards/format_reward_step": 0.87890625, "step": 128 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3082141927443445, "aux_distill/mean_u": 0.11286577517381002, "aux_distill/n_active_tok": 70.15625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4738412698412699, "calib/avg_num_step_conf": 1.09375, "calib/ece": 0.49418326693227094, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.06374501992031872, "calib/gap": -0.017893333333333372, "calib/mean_conf": 0.08557768924302789, "calib/mu_c": 0.07666666666666665, "calib/mu_w": 0.09456000000000002, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03888446215139442, "calib/std_conf": 0.2447145766889721, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 776.0, "completions/max_terminated_length": 776.0, "completions/mean_length": 226.61328125, "completions/mean_terminated_length": 226.61328125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.1376, "grad_norm": 0.014954674988985062, "learning_rate": 1.9722222222222224e-06, "loss": 0.051, "num_tokens": 24766309.0, "reward": 1.2313487529754639, "reward_std": 0.21718278527259827, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.49785393476486206, "rewards/format_reward_step": 0.98046875, "step": 129 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31981189316138625, "aux_distill/mean_u": 0.11389575257731732, "aux_distill/n_active_tok": 70.53125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.48054774340431233, "calib/avg_num_step_conf": 1.09765625, "calib/ece": 0.47861224489795906, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.04897959183673469, "calib/gap": 0.05308490692379804, "calib/mean_conf": 0.07461224489795919, "calib/mu_c": 0.09931297709923663, "calib/mu_w": 0.04622807017543859, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009265306122448977, "calib/std_conf": 0.2182367948481023, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1839.0, "completions/max_terminated_length": 1839.0, "completions/mean_length": 228.3671875, "completions/mean_terminated_length": 229.26275634765625, "completions/min_length": 0.0, "completions/min_terminated_length": 74.0, "epoch": 0.13866666666666666, "grad_norm": 0.015271571464836597, "learning_rate": 1.944444444444445e-06, "loss": 0.084, "num_tokens": 24930059.0, "reward": 1.2382566928863525, "reward_std": 0.19215700030326843, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.4960445463657379, "rewards/format_reward_step": 0.95703125, "step": 130 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3160778861492872, "aux_distill/mean_u": 0.10070666133135303, "aux_distill/n_active_tok": 74.0625, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4863674496644296, "calib/avg_num_step_conf": 1.15625, "calib/ece": 0.38048979591836735, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0653061224489796, "calib/gap": 0.011264681208053712, "calib/mean_conf": 0.08783673469387754, "calib/mu_c": 0.09468750000000002, "calib/mu_w": 0.08342281879194631, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.03824489795918368, "calib/std_conf": 0.24457671447381732, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1911.0, "completions/max_terminated_length": 1911.0, "completions/mean_length": 234.828125, "completions/mean_terminated_length": 237.61265563964844, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.13973333333333332, "grad_norm": 0.015647603198885918, "learning_rate": 1.916666666666667e-06, "loss": 0.0495, "num_tokens": 25096383.0, "reward": 1.1477234363555908, "reward_std": 0.19904610514640808, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.5884156227111816, "rewards/format_reward_step": 0.95703125, "step": 131 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31305989855900407, "aux_distill/mean_u": 0.14130636641858182, "aux_distill/n_active_tok": 71.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4506064162754303, "calib/avg_num_step_conf": 1.12109375, "calib/ece": 0.55212, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.1, "calib/gap": -0.04311163275952008, "calib/mean_conf": 0.12292, "calib/mu_c": 0.10429577464788732, "calib/mu_w": 0.1474074074074074, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.053520000000000005, "calib/std_conf": 0.29644944526849765, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 632.0, "completions/max_terminated_length": 632.0, "completions/mean_length": 221.77734375, "completions/mean_terminated_length": 222.64707946777344, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.1408, "grad_norm": 0.015256309881806374, "learning_rate": 1.888888888888889e-06, "loss": 0.0557, "num_tokens": 25258750.0, "reward": 1.2594385147094727, "reward_std": 0.2623821496963501, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.43684569001197815, "rewards/format_reward_step": 0.97265625, "step": 132 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3371236342936754, "aux_distill/mean_u": 0.10641052422516785, "aux_distill/n_active_tok": 72.28125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5029411764705882, "calib/avg_num_step_conf": 1.125, "calib/ece": 0.3203643724696356, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.07692307692307693, "calib/gap": 0.005825821237585918, "calib/mean_conf": 0.09663967611336033, "calib/mu_c": 0.10064935064935063, "calib/mu_w": 0.09482352941176471, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.052631578947368425, "calib/std_conf": 0.26286271205587297, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 498.0, "completions/max_terminated_length": 498.0, "completions/mean_length": 234.28515625, "completions/mean_terminated_length": 234.28515625, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.14186666666666667, "grad_norm": 0.015624146908521652, "learning_rate": 1.8611111111111113e-06, "loss": 0.0568, "num_tokens": 25425071.0, "reward": 1.1076685190200806, "reward_std": 0.26900923252105713, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.6489308476448059, "rewards/format_reward_step": 0.96484375, "step": 133 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33154400484636426, "aux_distill/mean_u": 0.13351008039308454, "aux_distill/n_active_tok": 75.28125, "calib/answer_extract_rate": 0.93359375, "calib/auroc": 0.5581487914314662, "calib/avg_num_step_conf": 1.16796875, "calib/ece": 0.373305439330544, "calib/final_conf_rate": 0.93359375, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.07112970711297072, "calib/gap": 0.0583550441453177, "calib/mean_conf": 0.09506276150627616, "calib/mu_c": 0.12948979591836735, "calib/mu_w": 0.07113475177304965, "calib/nonempty_final_conf_rate": 0.93359375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.029163179916317988, "calib/std_conf": 0.25451713293291445, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1154.0, "completions/max_terminated_length": 1154.0, "completions/mean_length": 244.79296875, "completions/mean_terminated_length": 245.75296020507812, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.14293333333333333, "grad_norm": 0.015033029951155186, "learning_rate": 1.8333333333333333e-06, "loss": 0.1103, "num_tokens": 25596690.0, "reward": 1.138121247291565, "reward_std": 0.25055497884750366, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.5809300541877747, "rewards/format_reward_step": 0.9296875, "step": 134 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.32621747767552733, "aux_distill/mean_u": 0.09390474473256447, "aux_distill/n_active_tok": 70.21875, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.5159611380985427, "calib/avg_num_step_conf": 1.08984375, "calib/ece": 0.4307053941908714, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.07883817427385892, "calib/gap": 0.03173976405274115, "calib/mean_conf": 0.09883817427385892, "calib/mu_c": 0.11609090909090909, "calib/mu_w": 0.08435114503816794, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03655601659751036, "calib/std_conf": 0.2665257859182153, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1175.0, "completions/max_terminated_length": 1175.0, "completions/mean_length": 223.390625, "completions/mean_terminated_length": 223.390625, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.144, "grad_norm": 0.017453555017709732, "learning_rate": 1.8055555555555557e-06, "loss": 0.0854, "num_tokens": 25759758.0, "reward": 1.1680976152420044, "reward_std": 0.2895016372203827, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5354140400886536, "rewards/format_reward_step": 0.94140625, "step": 135 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2697015115991235, "aux_distill/mean_u": 0.08167140688285077, "aux_distill/n_active_tok": 76.6875, "calib/answer_extract_rate": 0.94140625, "calib/auroc": 0.50177304964539, "calib/avg_num_step_conf": 1.19140625, "calib/ece": 0.3858506224066391, "calib/final_conf_rate": 0.94140625, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.058091286307053944, "calib/gap": 0.03815035460992909, "calib/mean_conf": 0.07887966804979252, "calib/mu_c": 0.10120000000000001, "calib/mu_w": 0.06304964539007092, "calib/nonempty_final_conf_rate": 0.94140625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.024896265560165977, "calib/std_conf": 0.23191783412618772, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 220.23828125, "completions/mean_terminated_length": 220.23828125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.14506666666666668, "grad_norm": 0.015763340517878532, "learning_rate": 1.777777777777778e-06, "loss": 0.056, "num_tokens": 25924627.0, "reward": 1.1440978050231934, "reward_std": 0.2571519613265991, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5694457292556763, "rewards/format_reward_step": 0.9375, "step": 136 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3393157171085477, "aux_distill/mean_u": 0.13486390960074313, "aux_distill/n_active_tok": 79.1875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.45054421768707487, "calib/avg_num_step_conf": 1.234375, "calib/ece": 0.39708502024291503, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.08097165991902834, "calib/gap": 0.01586666666666668, "calib/mean_conf": 0.0997570850202429, "calib/mu_c": 0.1092, "calib/mu_w": 0.09333333333333332, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0459919028340081, "calib/std_conf": 0.2703363134483246, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 226.609375, "completions/mean_terminated_length": 226.609375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.14613333333333334, "grad_norm": 0.014484074898064137, "learning_rate": 1.75e-06, "loss": 0.1431, "num_tokens": 26089623.0, "reward": 1.1627554893493652, "reward_std": 0.2251381278038025, "rewards/accuracy_reward_step": 0.390625, "rewards/final_brier_reward_step": 0.5794172286987305, "rewards/format_reward_step": 0.96484375, "step": 137 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.30941454344429076, "aux_distill/mean_u": 0.09865573523816851, "aux_distill/n_active_tok": 76.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5532268170426065, "calib/avg_num_step_conf": 1.1953125, "calib/ece": 0.47723320158102756, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.09090909090909091, "calib/gap": 0.0443496240601504, "calib/mean_conf": 0.11881422924901183, "calib/mu_c": 0.1398496240601504, "calib/mu_w": 0.09549999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03517786561264821, "calib/std_conf": 0.2908638100607603, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1248.0, "completions/max_terminated_length": 1248.0, "completions/mean_length": 194.859375, "completions/mean_terminated_length": 194.859375, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.1472, "grad_norm": 0.018135685473680496, "learning_rate": 1.7222222222222224e-06, "loss": 0.0513, "num_tokens": 26243843.0, "reward": 1.27192223072052, "reward_std": 0.2641417384147644, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5165007710456848, "rewards/format_reward_step": 0.98828125, "step": 138 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3026889590546489, "aux_distill/mean_u": 0.12891794165425197, "aux_distill/n_active_tok": 73.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5180472440944881, "calib/avg_num_step_conf": 1.1484375, "calib/ece": 0.4740873015873016, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06746031746031746, "calib/gap": 0.006313700787401566, "calib/mean_conf": 0.09273809523809524, "calib/mu_c": 0.09591999999999999, "calib/mu_w": 0.08960629921259843, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0353968253968254, "calib/std_conf": 0.25348844495036854, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 378.0, "completions/max_terminated_length": 378.0, "completions/mean_length": 187.58984375, "completions/mean_terminated_length": 188.32550048828125, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.14826666666666666, "grad_norm": 0.017611773684620857, "learning_rate": 1.6944444444444446e-06, "loss": 0.0482, "num_tokens": 26394962.0, "reward": 1.239492416381836, "reward_std": 0.2608584761619568, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5180472731590271, "rewards/format_reward_step": 0.984375, "step": 139 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3559173212852329, "aux_distill/mean_u": 0.12057362805447201, "aux_distill/n_active_tok": 81.96875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5053320561941252, "calib/avg_num_step_conf": 1.27734375, "calib/ece": 0.4995219123505977, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05179282868525897, "calib/gap": 0.031229885057471263, "calib/mean_conf": 0.0749003984063745, "calib/mu_c": 0.08933333333333333, "calib/mu_w": 0.05810344827586207, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018286852589641432, "calib/std_conf": 0.2229347224591291, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1865.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 210.265625, "completions/mean_terminated_length": 210.265625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.14933333333333335, "grad_norm": 0.017557280138134956, "learning_rate": 1.6666666666666667e-06, "loss": 0.0716, "num_tokens": 26553806.0, "reward": 1.2641351222991943, "reward_std": 0.21921595931053162, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.49311408400535583, "rewards/format_reward_step": 0.98046875, "step": 140 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.325836184900254, "aux_distill/mean_u": 0.1298084705460597, "aux_distill/n_active_tok": 89.15625, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5169692453597075, "calib/avg_num_step_conf": 1.38671875, "calib/ece": 0.5068312757201645, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.06995884773662552, "calib/gap": -0.044848936458474445, "calib/mean_conf": 0.09094650205761316, "calib/mu_c": 0.06842975206611569, "calib/mu_w": 0.11327868852459014, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04991769547325103, "calib/std_conf": 0.25741739099632116, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 721.0, "completions/max_terminated_length": 721.0, "completions/mean_length": 215.8984375, "completions/mean_terminated_length": 215.8984375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.1504, "grad_norm": 0.016918949782848358, "learning_rate": 1.638888888888889e-06, "loss": 0.0954, "num_tokens": 26716172.0, "reward": 1.1825156211853027, "reward_std": 0.29578882455825806, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.47049999237060547, "rewards/format_reward_step": 0.94921875, "step": 141 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38834019796922803, "aux_distill/mean_u": 0.17126695928741237, "aux_distill/n_active_tok": 83.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4985401459854015, "calib/avg_num_step_conf": 1.3046875, "calib/ece": 0.4508730158730158, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": -0.022569343065693442, "calib/mean_conf": 0.07626984126984127, "calib/mu_c": 0.064, "calib/mu_w": 0.08656934306569344, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03539682539682539, "calib/std_conf": 0.22101940634102674, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 709.0, "completions/max_terminated_length": 709.0, "completions/mean_length": 201.75, "completions/mean_terminated_length": 201.75, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.15146666666666667, "grad_norm": 0.019056685268878937, "learning_rate": 1.6111111111111113e-06, "loss": 0.0345, "num_tokens": 26872980.0, "reward": 1.2108287811279297, "reward_std": 0.21176810562610626, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5349390506744385, "rewards/format_reward_step": 0.98046875, "step": 142 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3469718978740275, "aux_distill/mean_u": 0.11716029338227302, "aux_distill/n_active_tok": 98.71875, "calib/answer_extract_rate": 0.9296875, "calib/auroc": 0.42581805106076953, "calib/avg_num_step_conf": 1.53515625, "calib/ece": 0.461218487394958, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.9296875, "calib/frac_conf_gt_0.9": 0.07563025210084033, "calib/gap": -0.07309385113268607, "calib/mean_conf": 0.09903361344537816, "calib/mu_c": 0.05757281553398059, "calib/mu_w": 0.13066666666666665, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.06373949579831932, "calib/std_conf": 0.26121787914065403, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 243.36328125, "completions/mean_terminated_length": 243.36328125, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.15253333333333333, "grad_norm": 0.015829890966415405, "learning_rate": 1.5833333333333333e-06, "loss": 0.1274, "num_tokens": 27042617.0, "reward": 1.1177458763122559, "reward_std": 0.28702062368392944, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5011168122291565, "rewards/format_reward_step": 0.9296875, "step": 143 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3681814162991941, "aux_distill/mean_u": 0.12000292491714179, "aux_distill/n_active_tok": 91.28125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5029694381422661, "calib/avg_num_step_conf": 1.421875, "calib/ece": 0.4917959183673469, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.07346938775510205, "calib/gap": 0.00044174562925396177, "calib/mean_conf": 0.09955102040816327, "calib/mu_c": 0.09976377952755905, "calib/mu_w": 0.09932203389830509, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03648979591836735, "calib/std_conf": 0.26153901345723407, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2921.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 212.92578125, "completions/mean_terminated_length": 213.76080322265625, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.1536, "grad_norm": 0.01637650653719902, "learning_rate": 1.5555555555555558e-06, "loss": 0.1096, "num_tokens": 27201254.0, "reward": 1.2170963287353516, "reward_std": 0.25823676586151123, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.48497384786605835, "rewards/format_reward_step": 0.95703125, "step": 144 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.2961839628405869, "aux_distill/mean_u": 0.11787739525770995, "aux_distill/n_active_tok": 81.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.461596484323757, "calib/avg_num_step_conf": 1.27734375, "calib/ece": 0.5660865612648222, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": 0.014169264069264068, "calib/mean_conf": 0.0700399209486166, "calib/mu_c": 0.07558441558441559, "calib/mu_w": 0.06141515151515152, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01371541501976284, "calib/std_conf": 0.2013873581212608, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 187.5390625, "completions/mean_terminated_length": 188.27452087402344, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.15466666666666667, "grad_norm": 0.01805811934173107, "learning_rate": 1.527777777777778e-06, "loss": 0.0142, "num_tokens": 27351968.0, "reward": 1.3120663166046143, "reward_std": 0.20374266803264618, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.43272656202316284, "rewards/format_reward_step": 0.98828125, "step": 145 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36236625676974654, "aux_distill/mean_u": 0.09993783496658051, "aux_distill/n_active_tok": 94.34375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5411502576242864, "calib/avg_num_step_conf": 1.4765625, "calib/ece": 0.30735138339920953, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": 0.06291440607157776, "calib/mean_conf": 0.08917035573122531, "calib/mu_c": 0.1306988372093023, "calib/mu_w": 0.06778443113772455, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028300395256916994, "calib/std_conf": 0.23328645191461295, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 540.0, "completions/max_terminated_length": 540.0, "completions/mean_length": 202.4453125, "completions/mean_terminated_length": 203.23922729492188, "completions/min_length": 0.0, "completions/min_terminated_length": 76.0, "epoch": 0.15573333333333333, "grad_norm": 0.01728767901659012, "learning_rate": 1.5e-06, "loss": 0.0343, "num_tokens": 27511010.0, "reward": 1.169335126876831, "reward_std": 0.22755557298660278, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6785140633583069, "rewards/format_reward_step": 0.98828125, "step": 146 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35689425515010953, "aux_distill/mean_u": 0.13463799166264762, "aux_distill/n_active_tok": 102.34375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.48808092948717946, "calib/avg_num_step_conf": 1.59765625, "calib/ece": 0.3636904761904761, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.01811698717948719, "calib/mean_conf": 0.08805555555555554, "calib/mu_c": 0.09927083333333335, "calib/mu_w": 0.08115384615384616, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0353968253968254, "calib/std_conf": 0.23207609608466293, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2326.0, "completions/max_terminated_length": 2326.0, "completions/mean_length": 221.4296875, "completions/mean_terminated_length": 221.4296875, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.1568, "grad_norm": 0.016975658014416695, "learning_rate": 1.4722222222222225e-06, "loss": 0.0652, "num_tokens": 27671376.0, "reward": 1.1728016138076782, "reward_std": 0.2246793806552887, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6190406084060669, "rewards/format_reward_step": 0.9765625, "step": 147 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38051269575953484, "aux_distill/mean_u": 0.16149938439742753, "aux_distill/n_active_tok": 92.96875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5304027632950991, "calib/avg_num_step_conf": 1.44921875, "calib/ece": 0.5001204819277109, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.03614457831325301, "calib/gap": 0.037861704900938487, "calib/mean_conf": 0.06734939759036146, "calib/mu_c": 0.08437956204379564, "calib/mu_w": 0.04651785714285715, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008634538152610442, "calib/std_conf": 0.19164109068829516, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 927.0, "completions/max_terminated_length": 927.0, "completions/mean_length": 201.98046875, "completions/mean_terminated_length": 201.98046875, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.15786666666666666, "grad_norm": 0.018086962401866913, "learning_rate": 1.4444444444444445e-06, "loss": 0.0778, "num_tokens": 27828195.0, "reward": 1.2653236389160156, "reward_std": 0.25430917739868164, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.48767852783203125, "rewards/format_reward_step": 0.97265625, "step": 148 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35395267652347684, "aux_distill/mean_u": 0.13261109607192345, "aux_distill/n_active_tok": 134.15625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5235483007760235, "calib/avg_num_step_conf": 2.09375, "calib/ece": 0.38477911646586344, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.04417670682730924, "calib/gap": 0.00790272946213541, "calib/mean_conf": 0.07827309236947792, "calib/mu_c": 0.08297029702970297, "calib/mu_w": 0.07506756756756756, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.028714859437751, "calib/std_conf": 0.2090536856715205, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 253.02734375, "completions/mean_terminated_length": 253.02734375, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.15893333333333334, "grad_norm": 0.013964850455522537, "learning_rate": 1.4166666666666667e-06, "loss": 0.079, "num_tokens": 27997426.0, "reward": 1.1745210886001587, "reward_std": 0.2611221969127655, "rewards/accuracy_reward_step": 0.39453125, "rewards/final_brier_reward_step": 0.5912296772003174, "rewards/format_reward_step": 0.96875, "step": 149 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36969612818211317, "aux_distill/mean_u": 0.1544741904551301, "aux_distill/n_active_tok": 101.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5018785415126878, "calib/avg_num_step_conf": 1.58984375, "calib/ece": 0.4910588235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": -0.0012176644493717692, "calib/mean_conf": 0.05945098039215686, "calib/mu_c": 0.05886363636363637, "calib/mu_w": 0.06008130081300814, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01643137254901961, "calib/std_conf": 0.17386182407759287, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 644.0, "completions/max_terminated_length": 644.0, "completions/mean_length": 197.3671875, "completions/mean_terminated_length": 198.1411895751953, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.16, "grad_norm": 0.015584699809551239, "learning_rate": 1.3888888888888892e-06, "loss": 0.0096, "num_tokens": 28152912.0, "reward": 1.2674424648284912, "reward_std": 0.15812954306602478, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5075414180755615, "rewards/format_reward_step": 0.99609375, "step": 150 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36593104852363467, "aux_distill/mean_u": 0.15258560407885197, "aux_distill/n_active_tok": 96.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5031104730653542, "calib/avg_num_step_conf": 1.51171875, "calib/ece": 0.3605577689243028, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": -0.03334974022422751, "calib/mean_conf": 0.05243027888446216, "calib/mu_c": 0.03130434782608695, "calib/mu_w": 0.06465408805031446, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023227091633466136, "calib/std_conf": 0.15073283837724566, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1860.0, "completions/max_terminated_length": 1860.0, "completions/mean_length": 225.625, "completions/mean_terminated_length": 226.5098114013672, "completions/min_length": 0.0, "completions/min_terminated_length": 75.0, "epoch": 0.16106666666666666, "grad_norm": 0.015279371291399002, "learning_rate": 1.3611111111111112e-06, "loss": 0.0523, "num_tokens": 28317696.0, "reward": 1.1589202880859375, "reward_std": 0.21208204329013824, "rewards/accuracy_reward_step": 0.359375, "rewards/final_brier_reward_step": 0.618621826171875, "rewards/format_reward_step": 0.98046875, "step": 151 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39372802060097456, "aux_distill/mean_u": 0.1188828538735574, "aux_distill/n_active_tok": 93.15625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5239783856805134, "calib/avg_num_step_conf": 1.453125, "calib/ece": 0.4027235772357724, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.032520325203252036, "calib/gap": -0.005854103343465061, "calib/mean_conf": 0.07126016260162601, "calib/mu_c": 0.0679047619047619, "calib/mu_w": 0.07375886524822696, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.02357723577235773, "calib/std_conf": 0.18500834247937423, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 228.0078125, "completions/mean_terminated_length": 228.0078125, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "epoch": 0.16213333333333332, "grad_norm": 0.015285255387425423, "learning_rate": 1.3333333333333334e-06, "loss": 0.1525, "num_tokens": 28481458.0, "reward": 1.174981713294983, "reward_std": 0.23847061395645142, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5687136650085449, "rewards/format_reward_step": 0.9609375, "step": 152 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4158263560384512, "aux_distill/mean_u": 0.185887581378134, "aux_distill/n_active_tok": 101.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5082277694982613, "calib/avg_num_step_conf": 1.578125, "calib/ece": 0.4423224409448819, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.01968503937007874, "calib/gap": 0.023963474913065073, "calib/mean_conf": 0.052874409448818896, "calib/mu_c": 0.06532786885245902, "calib/mu_w": 0.041364393939393944, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007440944881889762, "calib/std_conf": 0.14663943411123173, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 743.0, "completions/max_terminated_length": 743.0, "completions/mean_length": 218.94140625, "completions/mean_terminated_length": 219.80001831054688, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.1632, "grad_norm": 0.01481254305690527, "learning_rate": 1.3055555555555556e-06, "loss": 0.0337, "num_tokens": 28644827.0, "reward": 1.249547004699707, "reward_std": 0.18368719518184662, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5537816286087036, "rewards/format_reward_step": 0.9921875, "step": 153 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.350020554382354, "aux_distill/mean_u": 0.1530069324122927, "aux_distill/n_active_tok": 91.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5121492346938775, "calib/avg_num_step_conf": 1.43359375, "calib/ece": 0.43182539682539683, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": -0.007178571428571423, "calib/mean_conf": 0.07523809523809524, "calib/mu_c": 0.07125000000000001, "calib/mu_w": 0.07842857142857143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03130952380952381, "calib/std_conf": 0.21098677565462198, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 725.0, "completions/max_terminated_length": 725.0, "completions/mean_length": 201.703125, "completions/mean_terminated_length": 202.49412536621094, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.16426666666666667, "grad_norm": 0.016052914783358574, "learning_rate": 1.2777777777777779e-06, "loss": 0.0341, "num_tokens": 28800903.0, "reward": 1.209600806236267, "reward_std": 0.21740567684173584, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5598265528678894, "rewards/format_reward_step": 0.984375, "step": 154 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3580281659960747, "aux_distill/mean_u": 0.14145642484386875, "aux_distill/n_active_tok": 103.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4906623026569118, "calib/avg_num_step_conf": 1.609375, "calib/ece": 0.40308300395256913, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03557312252964427, "calib/gap": -0.008018867924528314, "calib/mean_conf": 0.0666403162055336, "calib/mu_c": 0.06198113207547169, "calib/mu_w": 0.07, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.025375494071146247, "calib/std_conf": 0.18552611088804694, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 619.0, "completions/max_terminated_length": 619.0, "completions/mean_length": 194.28515625, "completions/mean_terminated_length": 195.0470733642578, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.16533333333333333, "grad_norm": 0.01730683632194996, "learning_rate": 1.25e-06, "loss": 0.0453, "num_tokens": 28957856.0, "reward": 1.2017738819122314, "reward_std": 0.21324856579303741, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5871413946151733, "rewards/format_reward_step": 0.98828125, "step": 155 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36394436936825514, "aux_distill/mean_u": 0.15244325122522395, "aux_distill/n_active_tok": 121.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5520793222949558, "calib/avg_num_step_conf": 1.89453125, "calib/ece": 0.37584980237154153, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.02766798418972332, "calib/gap": 0.0351219355666795, "calib/mean_conf": 0.05893280632411067, "calib/mu_c": 0.07933962264150944, "calib/mu_w": 0.04421768707482994, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007905138339920948, "calib/std_conf": 0.1648236793891626, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2092.0, "completions/max_terminated_length": 2092.0, "completions/mean_length": 241.1171875, "completions/mean_terminated_length": 242.06275939941406, "completions/min_length": 0.0, "completions/min_terminated_length": 71.0, "epoch": 0.1664, "grad_norm": 0.011360744014382362, "learning_rate": 1.2222222222222223e-06, "loss": 0.0355, "num_tokens": 29124342.0, "reward": 1.2130236625671387, "reward_std": 0.18996354937553406, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6096410155296326, "rewards/format_reward_step": 0.98828125, "step": 156 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.401463286485523, "aux_distill/mean_u": 0.21597904442972382, "aux_distill/n_active_tok": 127.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4743506493506494, "calib/avg_num_step_conf": 2.0078125, "calib/ece": 0.42352000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": -0.004194805194805196, "calib/mean_conf": 0.057440000000000005, "calib/mu_c": 0.05509090909090908, "calib/mu_w": 0.059285714285714275, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.02048, "calib/std_conf": 0.17260778197984006, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1851.0, "completions/max_terminated_length": 1851.0, "completions/mean_length": 228.9609375, "completions/mean_terminated_length": 232.59524536132812, "completions/min_length": 0.0, "completions/min_terminated_length": 57.0, "epoch": 0.16746666666666668, "grad_norm": 0.012861998751759529, "learning_rate": 1.1944444444444446e-06, "loss": 0.0008, "num_tokens": 29286684.0, "reward": 1.2028257846832275, "reward_std": 0.20734016597270966, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5619015693664551, "rewards/format_reward_step": 0.9765625, "step": 157 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3736504097469151, "aux_distill/mean_u": 0.16085979852261284, "aux_distill/n_active_tok": 116.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.60516899878602, "calib/avg_num_step_conf": 1.82421875, "calib/ece": 0.5218476190476191, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.028447920260686217, "calib/mean_conf": 0.07735873015873017, "calib/mu_c": 0.08988936170212765, "calib/mu_w": 0.06144144144144143, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019841269841269837, "calib/std_conf": 0.21477512539229582, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 238.93359375, "completions/mean_terminated_length": 238.93359375, "completions/min_length": 70.0, "completions/min_terminated_length": 70.0, "epoch": 0.16853333333333334, "grad_norm": 0.013917519710958004, "learning_rate": 1.1666666666666668e-06, "loss": 0.0936, "num_tokens": 29453091.0, "reward": 1.283625841140747, "reward_std": 0.2249603569507599, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4813140332698822, "rewards/format_reward_step": 0.984375, "step": 158 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.387286314740777, "aux_distill/mean_u": 0.15654495993202383, "aux_distill/n_active_tok": 123.28125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5469138102334823, "calib/avg_num_step_conf": 1.92578125, "calib/ece": 0.44444881889763777, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.021331346249379032, "calib/mean_conf": 0.05948818897637795, "calib/mu_c": 0.07057377049180327, "calib/mu_w": 0.04924242424242424, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.011811023622047246, "calib/std_conf": 0.17298256095899606, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1930.0, "completions/max_terminated_length": 1930.0, "completions/mean_length": 224.86328125, "completions/mean_terminated_length": 224.86328125, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.1696, "grad_norm": 0.013575537130236626, "learning_rate": 1.138888888888889e-06, "loss": 0.0541, "num_tokens": 29615440.0, "reward": 1.2475013732910156, "reward_std": 0.20087464153766632, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5496902465820312, "rewards/format_reward_step": 0.9921875, "step": 159 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40404290379956365, "aux_distill/mean_u": 0.16472119918915018, "aux_distill/n_active_tok": 120.34375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5437062937062938, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.4110441767068273, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.004880591107006227, "calib/mean_conf": 0.07795180722891565, "calib/mu_c": 0.08075471698113208, "calib/mu_w": 0.07587412587412586, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03164658634538152, "calib/std_conf": 0.22251623668645712, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2743.0, "completions/max_terminated_length": 2743.0, "completions/mean_length": 247.48828125, "completions/mean_terminated_length": 249.43701171875, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.17066666666666666, "grad_norm": 0.012900142930448055, "learning_rate": 1.111111111111111e-06, "loss": 0.0777, "num_tokens": 29783637.0, "reward": 1.1860899925231934, "reward_std": 0.21723809838294983, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5713988542556763, "rewards/format_reward_step": 0.97265625, "step": 160 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3169996486976743, "aux_distill/mean_u": 0.15129769459006281, "aux_distill/n_active_tok": 127.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4888630319148936, "calib/avg_num_step_conf": 1.984375, "calib/ece": 0.5971653543307086, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": -0.012497340425531897, "calib/mean_conf": 0.08, "calib/mu_c": 0.075375, "calib/mu_w": 0.0878723404255319, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.023622047244094488, "calib/std_conf": 0.22045407685048604, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2533.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 216.55859375, "completions/mean_terminated_length": 216.55859375, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "epoch": 0.17173333333333332, "grad_norm": 0.014349176548421383, "learning_rate": 1.0833333333333335e-06, "loss": 0.0883, "num_tokens": 29942996.0, "reward": 1.3284180164337158, "reward_std": 0.19347649812698364, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.4068359434604645, "rewards/format_reward_step": 0.9921875, "step": 161 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34928833367303014, "aux_distill/mean_u": 0.1428228857288194, "aux_distill/n_active_tok": 132.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4971402573768361, "calib/avg_num_step_conf": 2.0625, "calib/ece": 0.5887843137254902, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.050980392156862744, "calib/gap": -0.01196347328740413, "calib/mean_conf": 0.07396078431372549, "calib/mu_c": 0.06936305732484077, "calib/mu_w": 0.0813265306122449, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.023529411764705882, "calib/std_conf": 0.21804928034324803, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2439.0, "completions/max_terminated_length": 2439.0, "completions/mean_length": 230.4765625, "completions/mean_terminated_length": 230.4765625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.1728, "grad_norm": 0.01401078887283802, "learning_rate": 1.0555555555555557e-06, "loss": 0.0889, "num_tokens": 30106142.0, "reward": 1.3188691139221191, "reward_std": 0.2151828408241272, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.41508203744888306, "rewards/format_reward_step": 0.99609375, "step": 162 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40522405598312616, "aux_distill/mean_u": 0.1774422503844015, "aux_distill/n_active_tok": 133.1875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5390625, "calib/avg_num_step_conf": 2.078125, "calib/ece": 0.4032258064516129, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028225806451612902, "calib/gap": -0.0003739316239316087, "calib/mean_conf": 0.056370967741935486, "calib/mu_c": 0.05615384615384617, "calib/mu_w": 0.05652777777777778, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020120967741935482, "calib/std_conf": 0.17469020818164752, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2263.0, "completions/max_terminated_length": 2263.0, "completions/mean_length": 265.3125, "completions/mean_terminated_length": 265.3125, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.17386666666666667, "grad_norm": 0.011856562457978725, "learning_rate": 1.0277777777777777e-06, "loss": 0.1035, "num_tokens": 30278894.0, "reward": 1.178366780281067, "reward_std": 0.25111645460128784, "rewards/accuracy_reward_step": 0.40625, "rewards/final_brier_reward_step": 0.5754835605621338, "rewards/format_reward_step": 0.96875, "step": 163 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.38988359714858234, "aux_distill/mean_u": 0.16001530292552524, "aux_distill/n_active_tok": 142.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5338594064386318, "calib/avg_num_step_conf": 2.22265625, "calib/ece": 0.41811023622047244, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.027559055118110236, "calib/gap": 0.009088279678068412, "calib/mean_conf": 0.053937007874015744, "calib/mu_c": 0.05901785714285714, "calib/mu_w": 0.04992957746478873, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015551181102362205, "calib/std_conf": 0.1687595199079184, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 248.578125, "completions/mean_terminated_length": 248.578125, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.17493333333333333, "grad_norm": 0.012431781738996506, "learning_rate": 1.0000000000000002e-06, "loss": 0.0477, "num_tokens": 30448666.0, "reward": 1.2250921726226807, "reward_std": 0.2109067142009735, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5751843452453613, "rewards/format_reward_step": 0.9921875, "step": 164 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.42134454753249884, "aux_distill/mean_u": 0.1822649157286872, "aux_distill/n_active_tok": 128.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5013222394220845, "calib/avg_num_step_conf": 2.01171875, "calib/ece": 0.3761023622047244, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.01310887512899897, "calib/mean_conf": 0.07460629921259843, "calib/mu_c": 0.08245098039215687, "calib/mu_w": 0.0693421052631579, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.024566929133858266, "calib/std_conf": 0.21081124361086834, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 558.0, "completions/max_terminated_length": 558.0, "completions/mean_length": 222.37109375, "completions/mean_terminated_length": 223.24314880371094, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.176, "grad_norm": 0.014216553419828415, "learning_rate": 9.722222222222224e-07, "loss": 0.0429, "num_tokens": 30611169.0, "reward": 1.1994494199752808, "reward_std": 0.18080687522888184, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.6098363399505615, "rewards/format_reward_step": 0.9921875, "step": 165 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3586527188308537, "aux_distill/mean_u": 0.18100545828037087, "aux_distill/n_active_tok": 132.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.517906683480454, "calib/avg_num_step_conf": 2.07421875, "calib/ece": 0.5074603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.03968253968253968, "calib/gap": -0.027437578814627994, "calib/mean_conf": 0.0692063492063492, "calib/mu_c": 0.05592307692307692, "calib/mu_w": 0.08336065573770492, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030396825396825394, "calib/std_conf": 0.2027474117604479, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2017.0, "completions/max_terminated_length": 2017.0, "completions/mean_length": 249.8984375, "completions/mean_terminated_length": 250.87844848632812, "completions/min_length": 0.0, "completions/min_terminated_length": 84.0, "epoch": 0.17706666666666668, "grad_norm": 0.01299862191081047, "learning_rate": 9.444444444444445e-07, "loss": 0.0718, "num_tokens": 30781327.0, "reward": 1.24409019947052, "reward_std": 0.21570822596549988, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.4881804585456848, "rewards/format_reward_step": 0.984375, "step": 166 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40623397240415215, "aux_distill/mean_u": 0.1911809320311116, "aux_distill/n_active_tok": 133.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5539028731248411, "calib/avg_num_step_conf": 2.0859375, "calib/ece": 0.5149603174603175, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.023809523809523808, "calib/gap": 0.016235697940503425, "calib/mean_conf": 0.04845238095238095, "calib/mu_c": 0.055797101449275355, "calib/mu_w": 0.03956140350877193, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007896825396825399, "calib/std_conf": 0.15272779536959882, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1564.0, "completions/max_terminated_length": 1564.0, "completions/mean_length": 241.88671875, "completions/mean_terminated_length": 242.83531188964844, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.17813333333333334, "grad_norm": 0.011331038549542427, "learning_rate": 9.166666666666666e-07, "loss": 0.0552, "num_tokens": 30948858.0, "reward": 1.271348237991333, "reward_std": 0.1793292909860611, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.480196475982666, "rewards/format_reward_step": 0.984375, "step": 167 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3805929427035153, "aux_distill/mean_u": 0.1850864661732162, "aux_distill/n_active_tok": 154.375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5907431551499348, "calib/avg_num_step_conf": 2.41015625, "calib/ece": 0.4818145161290322, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.024193548387096774, "calib/gap": 0.03916166883963494, "calib/mean_conf": 0.05044354838709678, "calib/mu_c": 0.06907692307692308, "calib/mu_w": 0.029915254237288132, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004032258064516128, "calib/std_conf": 0.15678538867999778, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2438.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 259.44921875, "completions/mean_terminated_length": 259.44921875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.1792, "grad_norm": 0.014961393550038338, "learning_rate": 8.88888888888889e-07, "loss": 0.1209, "num_tokens": 31119949.0, "reward": 1.2485013008117676, "reward_std": 0.2524811327457428, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5048152208328247, "rewards/format_reward_step": 0.96875, "step": 168 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34547490254044533, "aux_distill/mean_u": 0.10757020926135626, "aux_distill/n_active_tok": 120.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5413037909836066, "calib/avg_num_step_conf": 1.8828125, "calib/ece": 0.46304000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024, "calib/gap": 0.012348872950819656, "calib/mean_conf": 0.05712, "calib/mu_c": 0.06344262295081966, "calib/mu_w": 0.05109375000000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.016080000000000004, "calib/std_conf": 0.1751036995611458, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1253.0, "completions/max_terminated_length": 1253.0, "completions/mean_length": 230.125, "completions/mean_terminated_length": 230.125, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.18026666666666666, "grad_norm": 0.013571527786552906, "learning_rate": 8.611111111111112e-07, "loss": 0.0784, "num_tokens": 31283045.0, "reward": 1.2304667234420776, "reward_std": 0.20049837231636047, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.5273398160934448, "rewards/format_reward_step": 0.97265625, "step": 169 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37008119793608785, "aux_distill/mean_u": 0.14451895100963041, "aux_distill/n_active_tok": 137.09375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49952320406865863, "calib/avg_num_step_conf": 2.140625, "calib/ece": 0.459800796812749, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.04780876494023904, "calib/gap": 0.002264462809917364, "calib/mean_conf": 0.07709163346613547, "calib/mu_c": 0.07826446280991736, "calib/mu_w": 0.076, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02741035856573705, "calib/std_conf": 0.21855899759106026, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2515.0, "completions/max_terminated_length": 2515.0, "completions/mean_length": 244.1640625, "completions/mean_terminated_length": 244.1640625, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.18133333333333335, "grad_norm": 0.011340836994349957, "learning_rate": 8.333333333333333e-07, "loss": 0.0953, "num_tokens": 31449703.0, "reward": 1.2274580001831055, "reward_std": 0.21996477246284485, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5291347503662109, "rewards/format_reward_step": 0.98046875, "step": 170 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3528564595617354, "aux_distill/mean_u": 0.1634984313581802, "aux_distill/n_active_tok": 142.0625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5196801544082168, "calib/avg_num_step_conf": 2.21875, "calib/ece": 0.32158730158730164, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03571428571428571, "calib/gap": 0.039684290342593215, "calib/mean_conf": 0.06309523809523811, "calib/mu_c": 0.0887640449438202, "calib/mu_w": 0.04907975460122699, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01575396825396825, "calib/std_conf": 0.18882243395482923, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2720.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 255.046875, "completions/mean_terminated_length": 255.046875, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 0.1824, "grad_norm": 0.012555527500808239, "learning_rate": 8.055555555555557e-07, "loss": 0.0855, "num_tokens": 31621891.0, "reward": 1.1656486988067627, "reward_std": 0.20087698101997375, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6555160284042358, "rewards/format_reward_step": 0.98046875, "step": 171 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35162057261914015, "aux_distill/mean_u": 0.13648460286482703, "aux_distill/n_active_tok": 131.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4659004799191715, "calib/avg_num_step_conf": 2.0546875, "calib/ece": 0.5692549019607842, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0392156862745098, "calib/gap": -0.0409484718363223, "calib/mean_conf": 0.07380392156862745, "calib/mu_c": 0.05662162162162161, "calib/mu_w": 0.09757009345794392, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03133333333333333, "calib/std_conf": 0.20427595902504458, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 718.0, "completions/max_terminated_length": 718.0, "completions/mean_length": 224.22265625, "completions/mean_terminated_length": 225.1019744873047, "completions/min_length": 0.0, "completions/min_terminated_length": 94.0, "epoch": 0.18346666666666667, "grad_norm": 0.013728141784667969, "learning_rate": 7.777777777777779e-07, "loss": -0.0131, "num_tokens": 31782644.0, "reward": 1.2943949699401855, "reward_std": 0.20674556493759155, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.43644610047340393, "rewards/format_reward_step": 0.99609375, "step": 172 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35267816623672843, "aux_distill/mean_u": 0.1463806371924626, "aux_distill/n_active_tok": 162.25, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.47975628930817615, "calib/avg_num_step_conf": 2.53515625, "calib/ece": 0.3547839215686274, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.043137254901960784, "calib/gap": 0.02630636792452832, "calib/mean_conf": 0.07078470588235294, "calib/mu_c": 0.08718750000000001, "calib/mu_w": 0.060881132075471696, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.024549019607843142, "calib/std_conf": 0.20494122534161185, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2343.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 267.28515625, "completions/mean_terminated_length": 267.28515625, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.18453333333333333, "grad_norm": 0.012042745016515255, "learning_rate": 7.5e-07, "loss": 0.0252, "num_tokens": 31954229.0, "reward": 1.1889784336090088, "reward_std": 0.20391391217708588, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6357695460319519, "rewards/format_reward_step": 0.9921875, "step": 173 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.378698892891407, "aux_distill/mean_u": 0.2000926506861302, "aux_distill/n_active_tok": 140.96875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4848149113660062, "calib/avg_num_step_conf": 2.19921875, "calib/ece": 0.4685140562248996, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.04819277108433735, "calib/gap": -0.06858315954118874, "calib/mean_conf": 0.07469879518072288, "calib/mu_c": 0.03696428571428572, "calib/mu_w": 0.10554744525547446, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04670682730923695, "calib/std_conf": 0.2103945653553783, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1930.0, "completions/max_terminated_length": 1930.0, "completions/mean_length": 257.1015625, "completions/mean_terminated_length": 257.1015625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 0.1856, "grad_norm": 0.012609269469976425, "learning_rate": 7.222222222222222e-07, "loss": 0.0738, "num_tokens": 32124279.0, "reward": 1.1833367347717285, "reward_std": 0.24715489149093628, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.5190171599388123, "rewards/format_reward_step": 0.97265625, "step": 174 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39681370416656137, "aux_distill/mean_u": 0.16005521341916332, "aux_distill/n_active_tok": 130.4375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4434292122971368, "calib/avg_num_step_conf": 2.03515625, "calib/ece": 0.40518072289156626, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.05220883534136546, "calib/gap": 0.0007111756168360073, "calib/mean_conf": 0.08261044176706828, "calib/mu_c": 0.08301886792452831, "calib/mu_w": 0.0823076923076923, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.03104417670682731, "calib/std_conf": 0.2206434840179149, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 896.0, "completions/max_terminated_length": 896.0, "completions/mean_length": 238.83203125, "completions/mean_terminated_length": 239.76864624023438, "completions/min_length": 0.0, "completions/min_terminated_length": 66.0, "epoch": 0.18666666666666668, "grad_norm": 0.013867022469639778, "learning_rate": 6.944444444444446e-07, "loss": 0.0267, "num_tokens": 32291244.0, "reward": 1.1870672702789307, "reward_std": 0.2362235188484192, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5733535289764404, "rewards/format_reward_step": 0.97265625, "step": 175 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.4129004646092653, "aux_distill/mean_u": 0.22087444549099958, "aux_distill/n_active_tok": 140.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.493684012066365, "calib/avg_num_step_conf": 2.19921875, "calib/ece": 0.4260869565217391, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.043478260869565216, "calib/gap": 0.02046631473102059, "calib/mean_conf": 0.07549407114624507, "calib/mu_c": 0.08649572649572648, "calib/mu_w": 0.06602941176470589, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019565217391304353, "calib/std_conf": 0.2103907830333878, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1410.0, "completions/max_terminated_length": 1410.0, "completions/mean_length": 247.1328125, "completions/mean_terminated_length": 247.1328125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.18773333333333334, "grad_norm": 0.01126535888761282, "learning_rate": 6.666666666666667e-07, "loss": 0.0758, "num_tokens": 32458574.0, "reward": 1.23163902759552, "reward_std": 0.1989196240901947, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5609343647956848, "rewards/format_reward_step": 0.98828125, "step": 176 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3774943738244474, "aux_distill/mean_u": 0.14791538337202564, "aux_distill/n_active_tok": 142.5625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4859767891682786, "calib/avg_num_step_conf": 2.2421875, "calib/ece": 0.4026294820717132, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.055776892430278883, "calib/gap": 0.022279819471308845, "calib/mean_conf": 0.09075697211155379, "calib/mu_c": 0.10327272727272728, "calib/mu_w": 0.08099290780141843, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.027569721115537845, "calib/std_conf": 0.22860529995719947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2377.0, "completions/max_terminated_length": 2377.0, "completions/mean_length": 251.8984375, "completions/mean_terminated_length": 252.88629150390625, "completions/min_length": 0.0, "completions/min_terminated_length": 86.0, "epoch": 0.1888, "grad_norm": 0.012300615198910236, "learning_rate": 6.388888888888889e-07, "loss": 0.1106, "num_tokens": 32626892.0, "reward": 1.2100297212600708, "reward_std": 0.2131548970937729, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5802156329154968, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3958655558526516, "aux_distill/mean_u": 0.18713042017993753, "aux_distill/n_active_tok": 149.875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48383068647540983, "calib/avg_num_step_conf": 2.33984375, "calib/ece": 0.4704800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.016, "calib/gap": -0.014066342213114758, "calib/mean_conf": 0.04712, "calib/mu_c": 0.039918032786885244, "calib/mu_w": 0.053984375, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0148, "calib/std_conf": 0.13297708674805597, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2347.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 252.0234375, "completions/mean_terminated_length": 252.0234375, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.18986666666666666, "grad_norm": 0.011569799855351448, "learning_rate": 6.111111111111112e-07, "loss": 0.0964, "num_tokens": 32797482.0, "reward": 1.2241487503051758, "reward_std": 0.25436022877693176, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5186101198196411, "rewards/format_reward_step": 0.9765625, "step": 178 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39037828613072634, "aux_distill/mean_u": 0.17526136001380732, "aux_distill/n_active_tok": 165.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5905647163684982, "calib/avg_num_step_conf": 2.58203125, "calib/ece": 0.5101190476190478, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.03383268606353855, "calib/mean_conf": 0.04432539682539682, "calib/mu_c": 0.05949640287769784, "calib/mu_w": 0.025663716814159295, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0014285714285714284, "calib/std_conf": 0.127216621241411, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2311.0, "completions/max_terminated_length": 2311.0, "completions/mean_length": 266.0546875, "completions/mean_terminated_length": 266.0546875, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.19093333333333334, "grad_norm": 0.012259440496563911, "learning_rate": 5.833333333333334e-07, "loss": 0.0898, "num_tokens": 32971856.0, "reward": 1.2747502326965332, "reward_std": 0.17485995590686798, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.48700040578842163, "rewards/format_reward_step": 0.9765625, "step": 179 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3385851050261408, "aux_distill/mean_u": 0.14330181234511938, "aux_distill/n_active_tok": 140.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4555697823303457, "calib/avg_num_step_conf": 2.19140625, "calib/ece": 0.5428968253968254, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015873015873015872, "calib/gap": -0.023460947503201025, "calib/mean_conf": 0.04432539682539683, "calib/mu_c": 0.03408450704225352, "calib/mu_w": 0.057545454545454545, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011865079365079365, "calib/std_conf": 0.13172520274245528, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1013.0, "completions/max_terminated_length": 1013.0, "completions/mean_length": 259.75, "completions/mean_terminated_length": 261.7952880859375, "completions/min_length": 0.0, "completions/min_terminated_length": 77.0, "epoch": 0.192, "grad_norm": 0.01267837081104517, "learning_rate": 5.555555555555555e-07, "loss": 0.015, "num_tokens": 33142208.0, "reward": 1.2711178064346313, "reward_std": 0.2109297513961792, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4484855532646179, "rewards/format_reward_step": 0.984375, "step": 180 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3581990972161293, "aux_distill/mean_u": 0.1333516353267837, "aux_distill/n_active_tok": 126.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5822570123939987, "calib/avg_num_step_conf": 1.9765625, "calib/ece": 0.388804780876494, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": 0.034264187866927594, "calib/mean_conf": 0.05549800796812749, "calib/mu_c": 0.07542857142857143, "calib/mu_w": 0.041164383561643834, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.012988047808764941, "calib/std_conf": 0.16675019784722173, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2409.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 236.81640625, "completions/mean_terminated_length": 237.7451171875, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.19306666666666666, "grad_norm": 0.012049591168761253, "learning_rate": 5.277777777777779e-07, "loss": 0.0878, "num_tokens": 33309097.0, "reward": 1.201343059539795, "reward_std": 0.20447835326194763, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.601905107498169, "rewards/format_reward_step": 0.98046875, "step": 181 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3256945216562599, "aux_distill/mean_u": 0.1309827357755496, "aux_distill/n_active_tok": 129.15625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4998382714452064, "calib/avg_num_step_conf": 2.015625, "calib/ece": 0.5071887550200803, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.024096385542168676, "calib/gap": -0.011099754172596711, "calib/mean_conf": 0.05755020080321286, "calib/mu_c": 0.052290076335877865, "calib/mu_w": 0.06338983050847458, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.019317269076305224, "calib/std_conf": 0.1616556608992178, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2266.0, "completions/max_terminated_length": 2266.0, "completions/mean_length": 245.984375, "completions/mean_terminated_length": 246.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 72.0, "epoch": 0.19413333333333332, "grad_norm": 0.011237970553338528, "learning_rate": 5.000000000000001e-07, "loss": 0.0491, "num_tokens": 33478229.0, "reward": 1.2409536838531494, "reward_std": 0.19657526910305023, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.4858136773109436, "rewards/format_reward_step": 0.97265625, "step": 182 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3604277758859098, "aux_distill/mean_u": 0.14891531079840287, "aux_distill/n_active_tok": 121.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48015417940876665, "calib/avg_num_step_conf": 1.89453125, "calib/ece": 0.4139525691699604, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.023715415019762844, "calib/gap": 0.0016055045871559495, "calib/mean_conf": 0.05569169960474308, "calib/mu_c": 0.05660550458715595, "calib/mu_w": 0.055, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.01940711462450593, "calib/std_conf": 0.16343646494401404, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1800.0, "completions/max_terminated_length": 1800.0, "completions/mean_length": 248.0859375, "completions/mean_terminated_length": 248.0859375, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.1952, "grad_norm": 0.011038307100534439, "learning_rate": 4.7222222222222226e-07, "loss": 0.058, "num_tokens": 33648419.0, "reward": 1.2105414867401123, "reward_std": 0.21297506988048553, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5812394618988037, "rewards/format_reward_step": 0.98828125, "step": 183 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.39351186295971274, "aux_distill/mean_u": 0.18142064296222152, "aux_distill/n_active_tok": 134.8125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45229928861788615, "calib/avg_num_step_conf": 2.10546875, "calib/ece": 0.47968127490039836, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.02390438247011952, "calib/gap": -0.03226689532520326, "calib/mean_conf": 0.05442231075697211, "calib/mu_c": 0.037967479674796745, "calib/mu_w": 0.070234375, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02203187250996016, "calib/std_conf": 0.15731045300848923, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2236.0, "completions/max_terminated_length": 2236.0, "completions/mean_length": 261.609375, "completions/mean_terminated_length": 261.609375, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.19626666666666667, "grad_norm": 0.012052321806550026, "learning_rate": 4.444444444444445e-07, "loss": 0.0551, "num_tokens": 33820671.0, "reward": 1.225361704826355, "reward_std": 0.23105868697166443, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.50931715965271, "rewards/format_reward_step": 0.98046875, "step": 184 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3678031573072076, "aux_distill/mean_u": 0.17732553736026746, "aux_distill/n_active_tok": 148.75, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.4503010033444816, "calib/avg_num_step_conf": 2.3203125, "calib/ece": 0.43204081632653063, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.024489795918367346, "calib/gap": 0.015204013377926422, "calib/mean_conf": 0.06036734693877551, "calib/mu_c": 0.06843478260869565, "calib/mu_w": 0.05323076923076923, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011510204081632655, "calib/std_conf": 0.17128645226548408, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2368.0, "completions/max_terminated_length": 2368.0, "completions/mean_length": 262.9921875, "completions/mean_terminated_length": 264.0235595703125, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.19733333333333333, "grad_norm": 0.010781550779938698, "learning_rate": 4.1666666666666667e-07, "loss": 0.0398, "num_tokens": 33994917.0, "reward": 1.1965997219085693, "reward_std": 0.19675306975841522, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.537730872631073, "rewards/format_reward_step": 0.95703125, "step": 185 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40662080235779285, "aux_distill/mean_u": 0.20044403718500448, "aux_distill/n_active_tok": 142.5, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5070610924474306, "calib/avg_num_step_conf": 2.265625, "calib/ece": 0.42274509803921567, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.023529411764705882, "calib/gap": 0.005263157894736831, "calib/mean_conf": 0.055686274509803936, "calib/mu_c": 0.058596491228070174, "calib/mu_w": 0.053333333333333344, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.015686274509803925, "calib/std_conf": 0.16465120474316494, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 867.0, "completions/max_terminated_length": 867.0, "completions/mean_length": 239.94921875, "completions/mean_terminated_length": 240.8902130126953, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.1984, "grad_norm": 0.012303094379603863, "learning_rate": 3.8888888888888895e-07, "loss": 0.0567, "num_tokens": 34161384.0, "reward": 1.23370361328125, "reward_std": 0.18402235209941864, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5728757381439209, "rewards/format_reward_step": 0.99609375, "step": 186 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3461750391870737, "aux_distill/mean_u": 0.1459223011667055, "aux_distill/n_active_tok": 174.1875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48686660460167575, "calib/avg_num_step_conf": 2.71875, "calib/ece": 0.3996385542168674, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.060240963855421686, "calib/gap": -0.0027922596089905716, "calib/mean_conf": 0.08959839357429719, "calib/mu_c": 0.08796116504854369, "calib/mu_w": 0.09075342465753426, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03779116465863453, "calib/std_conf": 0.23633717523704523, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2082.0, "completions/max_terminated_length": 2082.0, "completions/mean_length": 264.1328125, "completions/mean_terminated_length": 264.1328125, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.19946666666666665, "grad_norm": 0.012116963975131512, "learning_rate": 3.611111111111111e-07, "loss": 0.1041, "num_tokens": 34330546.0, "reward": 1.1781506538391113, "reward_std": 0.2714693248271942, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.5789574384689331, "rewards/format_reward_step": 0.97265625, "step": 187 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36286410223692656, "aux_distill/mean_u": 0.18276469576506474, "aux_distill/n_active_tok": 138.34375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4541254554919313, "calib/avg_num_step_conf": 2.16015625, "calib/ece": 0.5208032128514055, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.012048192771084338, "calib/gap": -0.01723256116605934, "calib/mean_conf": 0.044658634538152615, "calib/mu_c": 0.03683823529411765, "calib/mu_w": 0.05407079646017699, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00963855421686747, "calib/std_conf": 0.1185148387492385, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2265.0, "completions/max_terminated_length": 2265.0, "completions/mean_length": 260.0546875, "completions/mean_terminated_length": 263.1383361816406, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.20053333333333334, "grad_norm": 0.011851982213556767, "learning_rate": 3.3333333333333335e-07, "loss": 0.0989, "num_tokens": 34501192.0, "reward": 1.2500507831573486, "reward_std": 0.21181350946426392, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.4649452865123749, "rewards/format_reward_step": 0.97265625, "step": 188 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.33696536626666784, "aux_distill/mean_u": 0.16132667425486644, "aux_distill/n_active_tok": 128.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5084586466165414, "calib/avg_num_step_conf": 2.00390625, "calib/ece": 0.4983070866141732, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.031496062992125984, "calib/gap": 0.0394874686716792, "calib/mean_conf": 0.06106299212598426, "calib/mu_c": 0.07878571428571429, "calib/mu_w": 0.03929824561403509, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004094488188976378, "calib/std_conf": 0.17610632293269302, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 536.0, "completions/max_terminated_length": 536.0, "completions/mean_length": 222.80078125, "completions/mean_terminated_length": 223.67453002929688, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.2016, "grad_norm": 0.014447653666138649, "learning_rate": 3.055555555555556e-07, "loss": 0.0249, "num_tokens": 34665997.0, "reward": 1.291475534439087, "reward_std": 0.20227526128292084, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.4970136880874634, "rewards/format_reward_step": 0.9921875, "step": 189 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34723477764055133, "aux_distill/mean_u": 0.15021027751099436, "aux_distill/n_active_tok": 120.40625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5632671422145107, "calib/avg_num_step_conf": 1.890625, "calib/ece": 0.45008000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": -0.006197545144913552, "calib/mean_conf": 0.06928, "calib/mu_c": 0.06598290598290599, "calib/mu_w": 0.07218045112781954, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02568, "calib/std_conf": 0.19074664243440825, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 878.0, "completions/max_terminated_length": 878.0, "completions/mean_length": 249.45703125, "completions/mean_terminated_length": 250.435302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.20266666666666666, "grad_norm": 0.010926991701126099, "learning_rate": 2.7777777777777776e-07, "loss": 0.0197, "num_tokens": 34835466.0, "reward": 1.2151250839233398, "reward_std": 0.23179908096790314, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5396250486373901, "rewards/format_reward_step": 0.9765625, "step": 190 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.31737090460956097, "aux_distill/mean_u": 0.10964079806256934, "aux_distill/n_active_tok": 147.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4878162782001745, "calib/avg_num_step_conf": 2.33984375, "calib/ece": 0.4236078431372549, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.003901283809048986, "calib/mean_conf": 0.07686274509803921, "calib/mu_c": 0.07469026548672567, "calib/mu_w": 0.07859154929577465, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028666666666666663, "calib/std_conf": 0.211171245406618, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 943.0, "completions/max_terminated_length": 943.0, "completions/mean_length": 231.07421875, "completions/mean_terminated_length": 231.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.20373333333333332, "grad_norm": 0.011647163890302181, "learning_rate": 2.5000000000000004e-07, "loss": 0.0283, "num_tokens": 34998789.0, "reward": 1.2246136665344238, "reward_std": 0.18209809064865112, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5703210830688477, "rewards/format_reward_step": 0.99609375, "step": 191 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.36972838593646884, "aux_distill/mean_u": 0.178276600960977, "aux_distill/n_active_tok": 134.90625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5226031746031746, "calib/avg_num_step_conf": 2.125, "calib/ece": 0.4762948207171314, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.004618412698412691, "calib/mean_conf": 0.04960159362549802, "calib/mu_c": 0.04730158730158731, "calib/mu_w": 0.05192, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01195219123505976, "calib/std_conf": 0.14437611570583614, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 673.0, "completions/max_terminated_length": 673.0, "completions/mean_length": 228.26953125, "completions/mean_terminated_length": 229.1647186279297, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.2048, "grad_norm": 0.011698748916387558, "learning_rate": 2.2222222222222224e-07, "loss": 0.0397, "num_tokens": 35162202.0, "reward": 1.2384189367294312, "reward_std": 0.1519850641489029, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5119941830635071, "rewards/format_reward_step": 0.98046875, "step": 192 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3735968554392457, "aux_distill/mean_u": 0.1607234325833779, "aux_distill/n_active_tok": 113.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5134469696969697, "calib/avg_num_step_conf": 1.77734375, "calib/ece": 0.42417322834645665, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.051181102362204724, "calib/gap": -0.015446969696969695, "calib/mean_conf": 0.08039370078740159, "calib/mu_c": 0.07163636363636364, "calib/mu_w": 0.08708333333333333, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.035748031496062996, "calib/std_conf": 0.22381235663819363, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 768.0, "completions/max_terminated_length": 768.0, "completions/mean_length": 233.796875, "completions/mean_terminated_length": 233.796875, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.20586666666666667, "grad_norm": 0.012572907842695713, "learning_rate": 1.9444444444444447e-07, "loss": 0.0844, "num_tokens": 35327766.0, "reward": 1.2097558975219727, "reward_std": 0.2349000871181488, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5679492354393005, "rewards/format_reward_step": 0.9921875, "step": 193 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.37450212286785245, "aux_distill/mean_u": 0.18273606891361022, "aux_distill/n_active_tok": 112.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5285008134150919, "calib/avg_num_step_conf": 1.78515625, "calib/ece": 0.44482213438735185, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.02257977724940559, "calib/mean_conf": 0.06363636363636364, "calib/mu_c": 0.07532786885245903, "calib/mu_w": 0.05274809160305344, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013122529644268773, "calib/std_conf": 0.1755899659662866, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 774.0, "completions/max_terminated_length": 774.0, "completions/mean_length": 215.4296875, "completions/mean_terminated_length": 216.27452087402344, "completions/min_length": 0.0, "completions/min_terminated_length": 79.0, "epoch": 0.20693333333333333, "grad_norm": 0.013541937805712223, "learning_rate": 1.6666666666666668e-07, "loss": 0.0431, "num_tokens": 35488860.0, "reward": 1.2452245950698853, "reward_std": 0.2098076343536377, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5490429401397705, "rewards/format_reward_step": 0.98828125, "step": 194 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.40552747156471014, "aux_distill/mean_u": 0.20745253260927174, "aux_distill/n_active_tok": 114.34375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5586550836550837, "calib/avg_num_step_conf": 1.78515625, "calib/ece": 0.5103984063745021, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03187250996015936, "calib/gap": 0.03267245817245817, "calib/mean_conf": 0.06290836653386454, "calib/mu_c": 0.07735714285714285, "calib/mu_w": 0.04468468468468468, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.007768924302788845, "calib/std_conf": 0.1762624005123713, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2493.0, "completions/max_terminated_length": 2493.0, "completions/mean_length": 227.03515625, "completions/mean_terminated_length": 227.03515625, "completions/min_length": 71.0, "completions/min_terminated_length": 71.0, "epoch": 0.208, "grad_norm": 0.015685738995671272, "learning_rate": 1.3888888888888888e-07, "loss": 0.0641, "num_tokens": 35652965.0, "reward": 1.279039978981018, "reward_std": 0.19776293635368347, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.4838613271713257, "rewards/format_reward_step": 0.98046875, "step": 195 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3677436811849475, "aux_distill/mean_u": 0.14246220831369982, "aux_distill/n_active_tok": 106.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.45757123304715686, "calib/avg_num_step_conf": 1.66015625, "calib/ece": 0.41980392156862745, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.047058823529411764, "calib/gap": -0.01334826427771553, "calib/mean_conf": 0.08545098039215686, "calib/mu_c": 0.07807017543859651, "calib/mu_w": 0.09141843971631204, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02909803921568627, "calib/std_conf": 0.2133479450013375, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 591.0, "completions/max_terminated_length": 591.0, "completions/mean_length": 199.859375, "completions/mean_terminated_length": 200.6431427001953, "completions/min_length": 0.0, "completions/min_terminated_length": 73.0, "epoch": 0.20906666666666668, "grad_norm": 0.014952057972550392, "learning_rate": 1.1111111111111112e-07, "loss": 0.045, "num_tokens": 35806673.0, "reward": 1.2272090911865234, "reward_std": 0.20598194003105164, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5676996111869812, "rewards/format_reward_step": 0.99609375, "step": 196 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35766037181019783, "aux_distill/mean_u": 0.19740065525817932, "aux_distill/n_active_tok": 118.71875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5157745363342354, "calib/avg_num_step_conf": 1.87890625, "calib/ece": 0.4115725806451612, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.028225806451612902, "calib/gap": 0.016730908850900936, "calib/mean_conf": 0.06447580645161291, "calib/mu_c": 0.07385321100917432, "calib/mu_w": 0.05712230215827338, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018266129032258063, "calib/std_conf": 0.17584968671538206, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1058.0, "completions/max_terminated_length": 1058.0, "completions/mean_length": 220.8984375, "completions/mean_terminated_length": 221.7647247314453, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.21013333333333334, "grad_norm": 0.013707943260669708, "learning_rate": 8.333333333333334e-08, "loss": 0.0255, "num_tokens": 35968279.0, "reward": 1.200000286102295, "reward_std": 0.2242618203163147, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.5718753933906555, "rewards/format_reward_step": 0.96875, "step": 197 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.35180373350158334, "aux_distill/mean_u": 0.12420010954716808, "aux_distill/n_active_tok": 126.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.51478895588702, "calib/avg_num_step_conf": 1.97265625, "calib/ece": 0.4947222222222223, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.01984126984126984, "calib/gap": 0.0274408124404951, "calib/mean_conf": 0.056309523809523816, "calib/mu_c": 0.06883211678832118, "calib/mu_w": 0.041391304347826084, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0036904761904761893, "calib/std_conf": 0.1475846254029491, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 231.1640625, "completions/mean_terminated_length": 231.1640625, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.2112, "grad_norm": 0.012614569626748562, "learning_rate": 5.555555555555556e-08, "loss": 0.0699, "num_tokens": 36132841.0, "reward": 1.2765079736709595, "reward_std": 0.21813374757766724, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.4983285069465637, "rewards/format_reward_step": 0.984375, "step": 198 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.34593457635492086, "aux_distill/mean_u": 0.17202861813479953, "aux_distill/n_active_tok": 136.84375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5353656949505465, "calib/avg_num_step_conf": 2.13671875, "calib/ece": 0.4144979919678715, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.03614457831325301, "calib/gap": 0.03869143675169182, "calib/mean_conf": 0.0689558232931727, "calib/mu_c": 0.09008849557522124, "calib/mu_w": 0.051397058823529414, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.014819277108433735, "calib/std_conf": 0.19453299566354348, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2070.0, "completions/max_terminated_length": 2070.0, "completions/mean_length": 247.671875, "completions/mean_terminated_length": 249.62203979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.21226666666666666, "grad_norm": 0.012673155404627323, "learning_rate": 2.777777777777778e-08, "loss": 0.0457, "num_tokens": 36300445.0, "reward": 1.212408423423767, "reward_std": 0.24468745291233063, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5693480372428894, "rewards/format_reward_step": 0.97265625, "step": 199 }, { "aux_distill/lambda": 0.10000000000000005, "aux_distill/loss": 0.3864405620843172, "aux_distill/mean_u": 0.16800184582972094, "aux_distill/n_active_tok": 121.5, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5787498461917067, "calib/avg_num_step_conf": 1.8984375, "calib/ece": 0.46976470588235286, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": 0.01872462163159838, "calib/mean_conf": 0.054313725490196085, "calib/mu_c": 0.06356589147286822, "calib/mu_w": 0.044841269841269835, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.009098039215686275, "calib/std_conf": 0.15270187412284936, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2597.0, "completions/max_terminated_length": 2597.0, "completions/mean_length": 238.53515625, "completions/mean_terminated_length": 238.53515625, "completions/min_length": 69.0, "completions/min_terminated_length": 69.0, "epoch": 0.21333333333333335, "grad_norm": 0.011946732178330421, "learning_rate": 0.0, "loss": 0.0595, "num_tokens": 36469558.0, "reward": 1.2669954299926758, "reward_std": 0.16499918699264526, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5300847887992859, "rewards/format_reward_step": 0.99609375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.07998869574104901, "train_runtime": 11768.0831, "train_samples_per_second": 4.351, "train_steps_per_second": 0.017 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 36469558, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }