{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.06398312747478485, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006252199877053499, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -6.368975378835882e-09, "aux_brier/mean_group_std": 0.07601744632372368, "aux_brier/mean_r": 0.4662125704897661, "aux_brier/n_active_tok": 30.0, "aux_brier/n_groups": 5.642857142857143, "aux_brier/n_step_records": 7.5, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.7, "calib/avg_num_step_conf": 0.4375, "calib/ece": 0.6792307692307693, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.6923076923076923, "calib/gap": 0.08233333333333326, "calib/mean_conf": 0.91, "calib/mu_c": 0.9733333333333333, "calib/mu_w": 0.891, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.6792307692307693, "calib/std_conf": 0.13067870169588874, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2852.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 633.98046875, "completions/mean_terminated_length": 702.5930786132812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.1778506636619568, "learning_rate": 7.5e-07, "loss": 0.0098, "num_tokens": 801022.0, "reward": 0.03784609213471413, "reward_std": 0.09251533448696136, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.018571875989437103, "rewards/format_reward_step": 0.04296875, "step": 3 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 0.0, "aux_brier/mean_group_std": 0.0, "aux_brier/mean_r": 0.451761796250204, "aux_brier/n_active_tok": 13.454545454545455, "aux_brier/n_groups": 3.3636363636363638, "aux_brier/n_step_records": 3.3636363636363638, "calib/answer_extract_rate": 0.046875, "calib/auroc": 0.035714285714285726, "calib/avg_num_step_conf": 0.1484375, "calib/ece": 0.7455555555555555, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.02734375, "calib/frac_conf_gt_0.9": 0.6666666666666666, "calib/gap": -0.062142857142857166, "calib/mean_conf": 0.9433333333333334, "calib/mu_c": 0.895, "calib/mu_w": 0.9571428571428572, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.05859375, "calib/nonempty_step_conf_rate": 0.04296875, "calib/pce": 0.7333333333333333, "calib/std_conf": 0.03496029493900503, "calib/step_conf_rate": 0.04296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 688.53515625, "completions/mean_terminated_length": 750.0637817382812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.004266666666666667, "grad_norm": 0.002101072808727622, "learning_rate": 1.0000000000000002e-06, "loss": 0.0278, "num_tokens": 1083455.0, "reward": 0.022981053218245506, "reward_std": 0.05210757628083229, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.00598671892657876, "rewards/format_reward_step": 0.02734375, "step": 4 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 3.725290298461914e-09, "aux_brier/mean_group_std": 0.017954056751551775, "aux_brier/mean_r": 0.5534369854965498, "aux_brier/n_active_tok": 19.666666666666668, "aux_brier/n_groups": 4.666666666666667, "aux_brier/n_step_records": 4.916666666666667, "calib/answer_extract_rate": 0.07421875, "calib/auroc": 0.46153846153846156, "calib/avg_num_step_conf": 0.23828125, "calib/ece": 0.7028571428571428, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.5714285714285714, "calib/gap": 0.15692307692307694, "calib/mean_conf": 0.7742857142857142, "calib/mu_c": 0.92, "calib/mu_w": 0.7630769230769231, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7028571428571428, "calib/std_conf": 0.286822849401465, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2920.0, "completions/max_terminated_length": 2920.0, "completions/mean_length": 602.31640625, "completions/mean_terminated_length": 656.140380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.04466242715716362, "learning_rate": 1.25e-06, "loss": 0.0346, "num_tokens": 1344336.0, "reward": 0.032600585371255875, "reward_std": 0.07122698426246643, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.013214843347668648, "rewards/format_reward_step": 0.04296875, "step": 5 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": 9.912880985199182e-09, "aux_brier/mean_group_std": 0.07133042040183464, "aux_brier/mean_r": 0.4559556346984404, "aux_brier/n_active_tok": 27.555555555555557, "aux_brier/n_groups": 5.555555555555555, "aux_brier/n_step_records": 6.888888888888889, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.8928571428571428, "calib/avg_num_step_conf": 0.28125, "calib/ece": 0.8853333333333335, "calib/final_conf_rate": 0.05859375, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 0.9333333333333333, "calib/gap": 0.040714285714285814, "calib/mean_conf": 0.9520000000000001, "calib/mu_c": 0.99, "calib/mu_w": 0.9492857142857142, "calib/nonempty_final_conf_rate": 0.05859375, "calib/nonempty_reasoning_rate": 0.10546875, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.8853333333333335, "calib/std_conf": 0.03409789827345178, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3016.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 614.10546875, "completions/mean_terminated_length": 649.6322021484375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.06114912033081055, "learning_rate": 1.5e-06, "loss": 0.0262, "num_tokens": 1607499.0, "reward": 0.022258300334215164, "reward_std": 0.05493953078985214, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.003095703199505806, "rewards/format_reward_step": 0.03515625, "step": 6 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -7.389005690134292e-09, "aux_brier/mean_group_std": 0.025351182038254524, "aux_brier/mean_r": 0.5071582871900493, "aux_brier/n_active_tok": 23.636363636363637, "aux_brier/n_groups": 5.090909090909091, "aux_brier/n_step_records": 5.909090909090909, "calib/answer_extract_rate": 0.0625, "calib/auroc": 0.6785714285714286, "calib/avg_num_step_conf": 0.28125, "calib/ece": 0.7388888888888889, "calib/final_conf_rate": 0.03515625, "calib/format_rate": 0.03515625, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": 0.011428571428571566, "calib/mean_conf": 0.9611111111111111, "calib/mu_c": 0.97, "calib/mu_w": 0.9585714285714284, "calib/nonempty_final_conf_rate": 0.03515625, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.0546875, "calib/pce": 0.7388888888888889, "calib/std_conf": 0.023778816176702997, "calib/step_conf_rate": 0.0546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 738.15234375, "completions/mean_terminated_length": 814.512939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.007466666666666667, "grad_norm": 0.0676758885383606, "learning_rate": 1.75e-06, "loss": 0.0197, "num_tokens": 1903890.0, "reward": 0.027891892939805984, "reward_std": 0.07488921284675598, "rewards/accuracy_reward_step": 0.0078125, "rewards/final_brier_reward_step": 0.01000507827848196, "rewards/format_reward_step": 0.03515625, "step": 7 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -2.035648509017089e-08, "aux_brier/mean_group_std": 0.11330899293162673, "aux_brier/mean_r": 0.487877350407437, "aux_brier/n_active_tok": 28.333333333333332, "aux_brier/n_groups": 4.5, "aux_brier/n_step_records": 7.083333333333333, "calib/answer_extract_rate": 0.078125, "calib/auroc": 0.5208333333333334, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.7214285714285713, "calib/final_conf_rate": 0.0546875, "calib/format_rate": 0.046875, "calib/frac_conf_gt_0.9": 0.6428571428571429, "calib/gap": -0.11000000000000021, "calib/mean_conf": 0.8642857142857144, "calib/mu_c": 0.77, "calib/mu_w": 0.8800000000000002, "calib/nonempty_final_conf_rate": 0.0546875, "calib/nonempty_reasoning_rate": 0.109375, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.7214285714285713, "calib/std_conf": 0.16637798127475048, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.125, "completions/max_length": 3066.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 618.22265625, "completions/mean_terminated_length": 706.5402221679688, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.1465633362531662, "learning_rate": 2.0000000000000003e-06, "loss": -0.0036, "num_tokens": 2168667.0, "reward": 0.03858320042490959, "reward_std": 0.08830758184194565, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.013707812875509262, "rewards/format_reward_step": 0.046875, "step": 8 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": 7.641621409722651e-10, "aux_brier/mean_group_std": 0.027135649539449965, "aux_brier/mean_r": 0.4896537195325747, "aux_brier/n_active_tok": 21.333333333333332, "aux_brier/n_groups": 4.916666666666667, "aux_brier/n_step_records": 5.333333333333333, "calib/answer_extract_rate": 0.0703125, "calib/auroc": 0.5416666666666667, "calib/avg_num_step_conf": 0.2578125, "calib/ece": 0.8338461538461538, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.8461538461538461, "calib/gap": 0.05333333333333323, "calib/mean_conf": 0.9107692307692307, "calib/mu_c": 0.96, "calib/mu_w": 0.9066666666666667, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.07421875, "calib/nonempty_step_conf_rate": 0.05078125, "calib/pce": 0.8338461538461538, "calib/std_conf": 0.1640572854200882, "calib/step_conf_rate": 0.05078125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06640625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 633.48046875, "completions/mean_terminated_length": 678.5397338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0096, "grad_norm": 0.004886317998170853, "learning_rate": 2.25e-06, "loss": -0.0044, "num_tokens": 2438374.0, "reward": 0.027958787977695465, "reward_std": 0.06483234465122223, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.010272655636072159, "rewards/format_reward_step": 0.04296875, "step": 9 }, { "aux_brier/lambda": 0.019999999999999997, "aux_brier/loss": -1.751129894792906e-08, "aux_brier/mean_group_std": 0.05809878932137776, "aux_brier/mean_r": 0.5582977201251549, "aux_brier/n_active_tok": 27.666666666666668, "aux_brier/n_groups": 5.666666666666667, "aux_brier/n_step_records": 6.916666666666667, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.40476190476190477, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.7652941176470589, "calib/final_conf_rate": 0.06640625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7647058823529411, "calib/gap": 0.0019047619047617426, "calib/mean_conf": 0.9417647058823528, "calib/mu_c": 0.9433333333333334, "calib/mu_w": 0.9414285714285716, "calib/nonempty_final_conf_rate": 0.06640625, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.7652941176470589, "calib/std_conf": 0.05802994096535774, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 3050.0, "completions/max_terminated_length": 3050.0, "completions/mean_length": 683.859375, "completions/mean_terminated_length": 751.3648071289062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.05447573959827423, "learning_rate": 2.5e-06, "loss": 0.0235, "num_tokens": 2720242.0, "reward": 0.03703994303941727, "reward_std": 0.08566759526729584, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.015347265638411045, "rewards/format_reward_step": 0.04296875, "step": 10 }, { "aux_brier/lambda": 0.02, "aux_brier/loss": -2.27734577862293e-08, "aux_brier/mean_group_std": 0.07176664606139693, "aux_brier/mean_r": 0.44533966665876523, "aux_brier/n_active_tok": 26.25, "aux_brier/n_groups": 4.4375, "aux_brier/n_step_records": 6.5625, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.7222222222222222, "calib/avg_num_step_conf": 0.421875, "calib/ece": 0.733181818181818, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.05078125, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": 0.07027777777777755, "calib/mean_conf": 0.9149999999999999, "calib/mu_c": 0.9724999999999999, "calib/mu_w": 0.9022222222222224, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.171875, "calib/nonempty_step_conf_rate": 0.11328125, "calib/pce": 0.733181818181818, "calib/std_conf": 0.1773222541537915, "calib/step_conf_rate": 0.11328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2858.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 646.1953125, "completions/mean_terminated_length": 695.0672607421875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011733333333333333, "grad_norm": 0.11878404766321182, "learning_rate": 2.7500000000000004e-06, "loss": 0.0307, "num_tokens": 2990148.0, "reward": 0.046680860221385956, "reward_std": 0.09421106427907944, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.022660937160253525, "rewards/format_reward_step": 0.05078125, "step": 11 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": -7.479463937087871e-09, "aux_brier/mean_group_std": 0.06543663627295798, "aux_brier/mean_r": 0.5006823367259615, "aux_brier/n_active_tok": 39.0, "aux_brier/n_groups": 7.2, "aux_brier/n_step_records": 9.75, "calib/answer_extract_rate": 0.14453125, "calib/auroc": 0.662878787878788, "calib/avg_num_step_conf": 0.76171875, "calib/ece": 0.6532142857142859, "calib/final_conf_rate": 0.109375, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/gap": 0.11772727272727268, "calib/mean_conf": 0.8675, "calib/mu_c": 0.96, "calib/mu_w": 0.8422727272727273, "calib/nonempty_final_conf_rate": 0.109375, "calib/nonempty_reasoning_rate": 0.1796875, "calib/nonempty_step_conf_rate": 0.1328125, "calib/pce": 0.6532142857142859, "calib/std_conf": 0.22318514095444872, "calib/step_conf_rate": 0.1328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2950.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 609.60546875, "completions/mean_terminated_length": 669.7811279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.08994059264659882, "learning_rate": 3e-06, "loss": 0.0677, "num_tokens": 3250383.0, "reward": 0.07732617110013962, "reward_std": 0.1596646010875702, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.03586718440055847, "rewards/format_reward_step": 0.08984375, "step": 12 }, { "aux_brier/lambda": 0.020000000000000004, "aux_brier/loss": -2.3756011515274623e-07, "aux_brier/mean_group_std": 0.08902455108563517, "aux_brier/mean_r": 0.43889196804565545, "aux_brier/n_active_tok": 38.72, "aux_brier/n_groups": 5.68, "aux_brier/n_step_records": 9.68, "calib/answer_extract_rate": 0.2265625, "calib/auroc": 0.575091575091575, "calib/avg_num_step_conf": 0.984375, "calib/ece": 0.6584545454545455, "calib/final_conf_rate": 0.21484375, "calib/format_rate": 0.16015625, "calib/frac_conf_gt_0.9": 0.7636363636363637, "calib/gap": 0.055137362637362464, "calib/mean_conf": 0.8948181818181818, "calib/mu_c": 0.9369230769230769, "calib/mu_w": 0.8817857142857144, "calib/nonempty_final_conf_rate": 0.21484375, "calib/nonempty_reasoning_rate": 0.265625, "calib/nonempty_step_conf_rate": 0.2109375, "calib/pce": 0.6584545454545455, "calib/std_conf": 0.14796438034621595, "calib/step_conf_rate": 0.2109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3056.0, "completions/max_terminated_length": 3056.0, "completions/mean_length": 668.140625, "completions/mean_terminated_length": 706.7933349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.009733308106660843, "learning_rate": 3.2500000000000002e-06, "loss": 0.0836, "num_tokens": 3526019.0, "reward": 0.14610213041305542, "reward_std": 0.26760661602020264, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.06097102910280228, "rewards/format_reward_step": 0.16015625, "step": 13 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.9600009132611385e-08, "aux_brier/mean_group_std": 0.12765103385594037, "aux_brier/mean_r": 0.48026452309656165, "aux_brier/n_active_tok": 38.78260869565217, "aux_brier/n_groups": 5.521739130434782, "aux_brier/n_step_records": 9.695652173913043, "calib/answer_extract_rate": 0.2265625, "calib/auroc": 0.421875, "calib/avg_num_step_conf": 0.88671875, "calib/ece": 0.6974418604651161, "calib/final_conf_rate": 0.16796875, "calib/format_rate": 0.13671875, "calib/frac_conf_gt_0.9": 0.7674418604651163, "calib/gap": -0.07255681818181803, "calib/mean_conf": 0.8858139534883723, "calib/mu_c": 0.8318181818181819, "calib/mu_w": 0.9043749999999999, "calib/nonempty_final_conf_rate": 0.16796875, "calib/nonempty_reasoning_rate": 0.26953125, "calib/nonempty_step_conf_rate": 0.1953125, "calib/pce": 0.663720930232558, "calib/std_conf": 0.20493030645313196, "calib/step_conf_rate": 0.1953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 2823.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 545.953125, "completions/mean_terminated_length": 592.2203369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.07104963064193726, "learning_rate": 3.5e-06, "loss": 0.0563, "num_tokens": 3771183.0, "reward": 0.12767666578292847, "reward_std": 0.20409031212329865, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.049769140779972076, "rewards/format_reward_step": 0.13671875, "step": 14 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.8159775292073355e-08, "aux_brier/mean_group_std": 0.09736662535951819, "aux_brier/mean_r": 0.4821922055720829, "aux_brier/n_active_tok": 45.733333333333334, "aux_brier/n_groups": 5.766666666666667, "aux_brier/n_step_records": 11.433333333333334, "calib/answer_extract_rate": 0.3203125, "calib/auroc": 0.5548349056603774, "calib/avg_num_step_conf": 1.42578125, "calib/ece": 0.7242028985507246, "calib/final_conf_rate": 0.26953125, "calib/format_rate": 0.2421875, "calib/frac_conf_gt_0.9": 0.8115942028985508, "calib/gap": -0.04554245283018887, "calib/mean_conf": 0.9062318840579712, "calib/mu_c": 0.87125, "calib/mu_w": 0.9167924528301888, "calib/nonempty_final_conf_rate": 0.26953125, "calib/nonempty_reasoning_rate": 0.3671875, "calib/nonempty_step_conf_rate": 0.2890625, "calib/pce": 0.6992753623188406, "calib/std_conf": 0.169458655975008, "calib/step_conf_rate": 0.2890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2958.0, "completions/max_terminated_length": 2958.0, "completions/mean_length": 524.5, "completions/mean_terminated_length": 559.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 6.0, "epoch": 0.016, "grad_norm": 0.04970122501254082, "learning_rate": 3.7500000000000005e-06, "loss": 0.0481, "num_tokens": 4013335.0, "reward": 0.20352879166603088, "reward_std": 0.27594560384750366, "rewards/accuracy_reward_step": 0.0625, "rewards/final_brier_reward_step": 0.07974023371934891, "rewards/format_reward_step": 0.2421875, "step": 15 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.381153160231243e-08, "aux_brier/mean_group_std": 0.1013538566227542, "aux_brier/mean_r": 0.4497839628122416, "aux_brier/n_active_tok": 47.375, "aux_brier/n_groups": 6.78125, "aux_brier/n_step_records": 11.84375, "calib/answer_extract_rate": 0.30859375, "calib/auroc": 0.3856589147286822, "calib/avg_num_step_conf": 1.52734375, "calib/ece": 0.6227868852459015, "calib/final_conf_rate": 0.23828125, "calib/format_rate": 0.20703125, "calib/frac_conf_gt_0.9": 0.7704918032786885, "calib/gap": -0.005090439276485559, "calib/mean_conf": 0.9080327868852459, "calib/mu_c": 0.9044444444444445, "calib/mu_w": 0.90953488372093, "calib/nonempty_final_conf_rate": 0.23828125, "calib/nonempty_reasoning_rate": 0.3828125, "calib/nonempty_step_conf_rate": 0.296875, "calib/pce": 0.6178688524590162, "calib/std_conf": 0.17051339711926247, "calib/step_conf_rate": 0.296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 550.51171875, "completions/mean_terminated_length": 584.7759399414062, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.017066666666666667, "grad_norm": 0.0331612192094326, "learning_rate": 4.000000000000001e-06, "loss": 0.12, "num_tokens": 4263114.0, "reward": 0.19834326207637787, "reward_std": 0.3126984238624573, "rewards/accuracy_reward_step": 0.07421875, "rewards/final_brier_reward_step": 0.08243554830551147, "rewards/format_reward_step": 0.20703125, "step": 16 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.6209495448958897e-08, "aux_brier/mean_group_std": 0.14503769411105435, "aux_brier/mean_r": 0.5303295482441595, "aux_brier/n_active_tok": 46.875, "aux_brier/n_groups": 6.21875, "aux_brier/n_step_records": 11.71875, "calib/answer_extract_rate": 0.36328125, "calib/auroc": 0.6134408602150537, "calib/avg_num_step_conf": 1.546875, "calib/ece": 0.6980246753246753, "calib/final_conf_rate": 0.30078125, "calib/format_rate": 0.234375, "calib/frac_conf_gt_0.9": 0.6753246753246753, "calib/gap": -0.007184408602150527, "calib/mean_conf": 0.8671181818181819, "calib/mu_c": 0.8613333333333333, "calib/mu_w": 0.8685177419354838, "calib/nonempty_final_conf_rate": 0.30078125, "calib/nonempty_reasoning_rate": 0.4140625, "calib/nonempty_step_conf_rate": 0.31640625, "calib/pce": 0.6851688311688312, "calib/std_conf": 0.2290769885484103, "calib/step_conf_rate": 0.31640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 496.265625, "completions/mean_terminated_length": 529.3500366210938, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.018133333333333335, "grad_norm": 0.021505752578377724, "learning_rate": 4.25e-06, "loss": 0.1227, "num_tokens": 4493686.0, "reward": 0.1966833919286728, "reward_std": 0.2693749666213989, "rewards/accuracy_reward_step": 0.05859375, "rewards/final_brier_reward_step": 0.08360857516527176, "rewards/format_reward_step": 0.234375, "step": 17 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.319586366407297e-08, "aux_brier/mean_group_std": 0.1386110328438892, "aux_brier/mean_r": 0.4721913663798735, "aux_brier/n_active_tok": 47.87096774193548, "aux_brier/n_groups": 5.741935483870968, "aux_brier/n_step_records": 11.96774193548387, "calib/answer_extract_rate": 0.3359375, "calib/auroc": 0.5151072124756335, "calib/avg_num_step_conf": 1.5, "calib/ece": 0.6594711111111109, "calib/final_conf_rate": 0.29296875, "calib/format_rate": 0.26171875, "calib/frac_conf_gt_0.9": 0.7733333333333333, "calib/gap": 0.018959064327485398, "calib/mean_conf": 0.8739244444444444, "calib/mu_c": 0.8883333333333333, "calib/mu_w": 0.8693742690058479, "calib/nonempty_final_conf_rate": 0.29296875, "calib/nonempty_reasoning_rate": 0.39453125, "calib/nonempty_step_conf_rate": 0.33203125, "calib/pce": 0.6466977777777776, "calib/std_conf": 0.22668512717853948, "calib/step_conf_rate": 0.33203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 3072.0, "completions/max_terminated_length": 3072.0, "completions/mean_length": 571.9375, "completions/mean_terminated_length": 595.1869506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 7.0, "epoch": 0.0192, "grad_norm": 0.010450121015310287, "learning_rate": 4.5e-06, "loss": 0.1138, "num_tokens": 4750822.0, "reward": 0.22563646733760834, "reward_std": 0.3434634804725647, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.09785837680101395, "rewards/format_reward_step": 0.26171875, "step": 18 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1184898590643755e-08, "aux_brier/mean_group_std": 0.1945929986640071, "aux_brier/mean_r": 0.5057144061160669, "aux_brier/n_active_tok": 105.25, "aux_brier/n_groups": 8.15625, "aux_brier/n_step_records": 26.3125, "calib/answer_extract_rate": 0.65234375, "calib/auroc": 0.5066051844466599, "calib/avg_num_step_conf": 3.33203125, "calib/ece": 0.6896710526315789, "calib/final_conf_rate": 0.59375, "calib/format_rate": 0.52734375, "calib/frac_conf_gt_0.9": 0.756578947368421, "calib/gap": 0.01999501495513456, "calib/mean_conf": 0.9050657894736842, "calib/mu_c": 0.9205882352941176, "calib/mu_w": 0.900593220338983, "calib/nonempty_final_conf_rate": 0.59375, "calib/nonempty_reasoning_rate": 0.75, "calib/nonempty_step_conf_rate": 0.66796875, "calib/pce": 0.6855263157894737, "calib/std_conf": 0.1711109085948824, "calib/step_conf_rate": 0.66796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2632.0, "completions/max_terminated_length": 2632.0, "completions/mean_length": 401.12890625, "completions/mean_terminated_length": 410.7560119628906, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.020266666666666665, "grad_norm": 0.06618562340736389, "learning_rate": 4.75e-06, "loss": 0.0858, "num_tokens": 4958271.0, "reward": 0.44414716958999634, "reward_std": 0.4753453731536865, "rewards/accuracy_reward_step": 0.13671875, "rewards/final_brier_reward_step": 0.17502617835998535, "rewards/format_reward_step": 0.52734375, "step": 19 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.151205114736543e-08, "aux_brier/mean_group_std": 0.18822933652363377, "aux_brier/mean_r": 0.46914548827895425, "aux_brier/n_active_tok": 123.625, "aux_brier/n_groups": 8.25, "aux_brier/n_step_records": 30.90625, "calib/answer_extract_rate": 0.78515625, "calib/auroc": 0.5387919463087247, "calib/avg_num_step_conf": 3.94140625, "calib/ece": 0.6388944723618092, "calib/final_conf_rate": 0.77734375, "calib/format_rate": 0.68359375, "calib/frac_conf_gt_0.9": 0.7437185929648241, "calib/gap": 0.024506711409396087, "calib/mean_conf": 0.8901507537688441, "calib/mu_c": 0.9085, "calib/mu_w": 0.8839932885906039, "calib/nonempty_final_conf_rate": 0.77734375, "calib/nonempty_reasoning_rate": 0.8515625, "calib/nonempty_step_conf_rate": 0.765625, "calib/pce": 0.6388944723618092, "calib/std_conf": 0.18575184014572316, "calib/step_conf_rate": 0.765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 353.3125, "completions/mean_terminated_length": 356.094482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.021333333333333333, "grad_norm": 0.05135459825396538, "learning_rate": 5e-06, "loss": 0.1086, "num_tokens": 5153591.0, "reward": 0.6015430092811584, "reward_std": 0.4995270371437073, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.25773465633392334, "rewards/format_reward_step": 0.68359375, "step": 20 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.4417219496308817e-09, "aux_brier/mean_group_std": 0.21602263166428068, "aux_brier/mean_r": 0.4703219827572299, "aux_brier/n_active_tok": 138.75, "aux_brier/n_groups": 8.3125, "aux_brier/n_step_records": 34.6875, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.5354761904761904, "calib/avg_num_step_conf": 4.375, "calib/ece": 0.6650458715596331, "calib/final_conf_rate": 0.8515625, "calib/format_rate": 0.7734375, "calib/frac_conf_gt_0.9": 0.7385321100917431, "calib/gap": 0.04074999999999984, "calib/mean_conf": 0.8855963302752293, "calib/mu_c": 0.9169999999999999, "calib/mu_w": 0.8762500000000001, "calib/nonempty_final_conf_rate": 0.8515625, "calib/nonempty_reasoning_rate": 0.93359375, "calib/nonempty_step_conf_rate": 0.86328125, "calib/pce": 0.6606422018348624, "calib/std_conf": 0.1948673093246116, "calib/step_conf_rate": 0.86328125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3053.0, "completions/max_terminated_length": 3053.0, "completions/mean_length": 324.41796875, "completions/mean_terminated_length": 326.9724426269531, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.0224, "grad_norm": 0.021180493757128716, "learning_rate": 4.9722222222222224e-06, "loss": 0.1107, "num_tokens": 5339602.0, "reward": 0.6721444129943848, "reward_std": 0.4640282988548279, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.2823277413845062, "rewards/format_reward_step": 0.7734375, "step": 21 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.936686088807818e-07, "aux_brier/mean_group_std": 0.20006337605881283, "aux_brier/mean_r": 0.49430907186332984, "aux_brier/n_active_tok": 153.125, "aux_brier/n_groups": 9.0, "aux_brier/n_step_records": 38.28125, "calib/answer_extract_rate": 0.890625, "calib/auroc": 0.5538777606574217, "calib/avg_num_step_conf": 4.8671875, "calib/ece": 0.6602410714285715, "calib/final_conf_rate": 0.875, "calib/format_rate": 0.81640625, "calib/frac_conf_gt_0.9": 0.7767857142857143, "calib/gap": 0.027565896250642208, "calib/mean_conf": 0.9178303571428571, "calib/mu_c": 0.938135593220339, "calib/mu_w": 0.9105696969696968, "calib/nonempty_final_conf_rate": 0.875, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.921875, "calib/pce": 0.6573392857142857, "calib/std_conf": 0.13423080392108594, "calib/step_conf_rate": 0.921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1807.0, "completions/max_terminated_length": 1807.0, "completions/mean_length": 316.33203125, "completions/mean_terminated_length": 317.57257080078125, "completions/min_length": 0.0, "completions/min_terminated_length": 43.0, "epoch": 0.023466666666666667, "grad_norm": 0.029975971207022667, "learning_rate": 4.944444444444445e-06, "loss": 0.0858, "num_tokens": 5522399.0, "reward": 0.7157385349273682, "reward_std": 0.5147027969360352, "rewards/accuracy_reward_step": 0.23046875, "rewards/final_brier_reward_step": 0.30826669931411743, "rewards/format_reward_step": 0.81640625, "step": 22 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7249143106912168e-08, "aux_brier/mean_group_std": 0.20721674775241733, "aux_brier/mean_r": 0.46474896424645457, "aux_brier/n_active_tok": 153.375, "aux_brier/n_groups": 8.78125, "aux_brier/n_step_records": 38.34375, "calib/answer_extract_rate": 0.91015625, "calib/auroc": 0.4869805842859736, "calib/avg_num_step_conf": 4.85546875, "calib/ece": 0.6578798283261801, "calib/final_conf_rate": 0.91015625, "calib/format_rate": 0.85546875, "calib/frac_conf_gt_0.9": 0.8197424892703863, "calib/gap": -0.017321538740700304, "calib/mean_conf": 0.9185665236051502, "calib/mu_c": 0.9061515151515153, "calib/mu_w": 0.9234730538922156, "calib/nonempty_final_conf_rate": 0.91015625, "calib/nonempty_reasoning_rate": 0.98046875, "calib/nonempty_step_conf_rate": 0.94921875, "calib/pce": 0.6465922746781114, "calib/std_conf": 0.14641637990410666, "calib/step_conf_rate": 0.94921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2928.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 306.109375, "completions/mean_terminated_length": 307.309814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.024533333333333334, "grad_norm": 0.05430552363395691, "learning_rate": 4.9166666666666665e-06, "loss": 0.1045, "num_tokens": 5704699.0, "reward": 0.771274745464325, "reward_std": 0.47520768642425537, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.31166166067123413, "rewards/format_reward_step": 0.85546875, "step": 23 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.0086297331830565e-09, "aux_brier/mean_group_std": 0.22335746791734964, "aux_brier/mean_r": 0.47170807526595093, "aux_brier/n_active_tok": 185.75, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 46.4375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5161184210526316, "calib/avg_num_step_conf": 5.8671875, "calib/ece": 0.7104201680672269, "calib/final_conf_rate": 0.9296875, "calib/format_rate": 0.91015625, "calib/frac_conf_gt_0.9": 0.7899159663865546, "calib/gap": 0.04412499999999986, "calib/mean_conf": 0.9078991596638656, "calib/mu_c": 0.9431249999999999, "calib/mu_w": 0.899, "calib/nonempty_final_conf_rate": 0.9296875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.7083193277310924, "calib/std_conf": 0.185018163766868, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2320.0, "completions/max_terminated_length": 2320.0, "completions/mean_length": 314.80078125, "completions/mean_terminated_length": 316.0353088378906, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.0256, "grad_norm": 0.14085201919078827, "learning_rate": 4.888888888888889e-06, "loss": 0.0463, "num_tokens": 5889800.0, "reward": 0.7152005434036255, "reward_std": 0.4024164080619812, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.2904898524284363, "rewards/format_reward_step": 0.91015625, "step": 24 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.49456521947711e-09, "aux_brier/mean_group_std": 0.19012552279245282, "aux_brier/mean_r": 0.46267547298595846, "aux_brier/n_active_tok": 171.625, "aux_brier/n_groups": 9.3125, "aux_brier/n_step_records": 42.90625, "calib/answer_extract_rate": 0.9375, "calib/auroc": 0.44689075630252095, "calib/avg_num_step_conf": 5.44921875, "calib/ece": 0.6383794238683127, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.90625, "calib/frac_conf_gt_0.9": 0.757201646090535, "calib/gap": -0.016868100840335853, "calib/mean_conf": 0.9033242798353909, "calib/mu_c": 0.8911764705882355, "calib/mu_w": 0.9080445714285713, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.6309341563786007, "calib/std_conf": 0.1727649756689619, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1508.0, "completions/max_terminated_length": 1508.0, "completions/mean_length": 281.765625, "completions/mean_terminated_length": 282.87060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.02666666666666667, "grad_norm": 0.6250544190406799, "learning_rate": 4.861111111111111e-06, "loss": 0.0402, "num_tokens": 6065156.0, "reward": 0.8051015138626099, "reward_std": 0.4535765051841736, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.34540611505508423, "rewards/format_reward_step": 0.90625, "step": 25 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.908535933244144e-09, "aux_brier/mean_group_std": 0.1838517052863827, "aux_brier/mean_r": 0.4691080054972976, "aux_brier/n_active_tok": 194.0, "aux_brier/n_groups": 10.5, "aux_brier/n_step_records": 48.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5859812240476621, "calib/avg_num_step_conf": 6.11328125, "calib/ece": 0.6937871485943776, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.8433734939759037, "calib/gap": 0.03806038996208705, "calib/mean_conf": 0.9267188755020079, "calib/mu_c": 0.9559137931034484, "calib/mu_w": 0.9178534031413613, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6937871485943776, "calib/std_conf": 0.12898255417795712, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1806.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 302.578125, "completions/mean_terminated_length": 303.76470947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.027733333333333332, "grad_norm": 0.06872251629829407, "learning_rate": 4.833333333333333e-06, "loss": -0.0174, "num_tokens": 6247856.0, "reward": 0.7908028960227966, "reward_std": 0.3393092751502991, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.32727402448654175, "rewards/format_reward_step": 0.96484375, "step": 26 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0543161330089923e-08, "aux_brier/mean_group_std": 0.18653052802370929, "aux_brier/mean_r": 0.450081871628691, "aux_brier/n_active_tok": 183.25, "aux_brier/n_groups": 9.5625, "aux_brier/n_step_records": 45.8125, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.4747953869047619, "calib/avg_num_step_conf": 5.859375, "calib/ece": 0.6900924731182796, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.7701612903225806, "calib/gap": 0.002150347222222182, "calib/mean_conf": 0.9081602150537635, "calib/mu_c": 0.9098249999999999, "calib/mu_w": 0.9076746527777777, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.6862231182795699, "calib/std_conf": 0.15941406184053702, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 739.0, "completions/max_terminated_length": 739.0, "completions/mean_length": 281.00390625, "completions/mean_terminated_length": 282.10589599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0288, "grad_norm": 0.35110238194465637, "learning_rate": 4.805555555555556e-06, "loss": -0.0068, "num_tokens": 6425009.0, "reward": 0.7702633738517761, "reward_std": 0.4074326753616333, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.3154284358024597, "rewards/format_reward_step": 0.9375, "step": 27 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.461542828429252e-08, "aux_brier/mean_group_std": 0.17219409015732765, "aux_brier/mean_r": 0.40732684840313854, "aux_brier/n_active_tok": 187.125, "aux_brier/n_groups": 10.25, "aux_brier/n_step_records": 46.78125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5540783461912965, "calib/avg_num_step_conf": 5.91015625, "calib/ece": 0.6253366935483871, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.014456108156692182, "calib/mean_conf": 0.9371108870967743, "calib/mu_c": 0.9469620253164556, "calib/mu_w": 0.9325059171597634, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.6219495967741935, "calib/std_conf": 0.12946590347936862, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2462.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 320.33203125, "completions/mean_terminated_length": 320.33203125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.029866666666666666, "grad_norm": 0.16183070838451385, "learning_rate": 4.777777777777778e-06, "loss": 0.0327, "num_tokens": 6613958.0, "reward": 0.8812159299850464, "reward_std": 0.43845492601394653, "rewards/accuracy_reward_step": 0.30859375, "rewards/final_brier_reward_step": 0.37642624974250793, "rewards/format_reward_step": 0.95703125, "step": 28 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.8970811097317473e-09, "aux_brier/mean_group_std": 0.1829825708600916, "aux_brier/mean_r": 0.43041540045039833, "aux_brier/n_active_tok": 199.0, "aux_brier/n_groups": 11.21875, "aux_brier/n_step_records": 49.75, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5139797739440808, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.7700004065040651, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.8495934959349594, "calib/gap": -0.0006834146341464509, "calib/mean_conf": 0.9303256097560977, "calib/mu_c": 0.9297560975609755, "calib/mu_w": 0.930439512195122, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.766829674796748, "calib/std_conf": 0.13811433240502272, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2285.0, "completions/max_terminated_length": 2285.0, "completions/mean_length": 339.1796875, "completions/mean_terminated_length": 339.1796875, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.030933333333333334, "grad_norm": 0.574836790561676, "learning_rate": 4.75e-06, "loss": -0.0201, "num_tokens": 6807916.0, "reward": 0.7120146751403809, "reward_std": 0.35177648067474365, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.24649646878242493, "rewards/format_reward_step": 0.94921875, "step": 29 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.1687137904957687e-08, "aux_brier/mean_group_std": 0.18021648479185076, "aux_brier/mean_r": 0.43000765493765214, "aux_brier/n_active_tok": 208.875, "aux_brier/n_groups": 11.53125, "aux_brier/n_step_records": 52.21875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49403353057199206, "calib/avg_num_step_conf": 6.56640625, "calib/ece": 0.7050647773279352, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.94140625, "calib/frac_conf_gt_0.9": 0.7651821862348178, "calib/gap": 0.0216615384615384, "calib/mean_conf": 0.9155910931174088, "calib/mu_c": 0.9326923076923077, "calib/mu_w": 0.9110307692307693, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.7050647773279352, "calib/std_conf": 0.14953452339125448, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1408.0, "completions/max_terminated_length": 1408.0, "completions/mean_length": 340.52734375, "completions/mean_terminated_length": 340.52734375, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.032, "grad_norm": 0.1962406188249588, "learning_rate": 4.722222222222222e-06, "loss": 0.0055, "num_tokens": 7002075.0, "reward": 0.7536724805831909, "reward_std": 0.3878949284553528, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.3037523627281189, "rewards/format_reward_step": 0.94140625, "step": 30 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.299048018245436e-09, "aux_brier/mean_group_std": 0.18572330139506193, "aux_brier/mean_r": 0.4352240946835064, "aux_brier/n_active_tok": 198.875, "aux_brier/n_groups": 10.5625, "aux_brier/n_step_records": 49.71875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.414157196969697, "calib/avg_num_step_conf": 6.24609375, "calib/ece": 0.7004372469635628, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.7651821862348178, "calib/gap": 0.003952651515151673, "calib/mean_conf": 0.9231093117408907, "calib/mu_c": 0.9261818181818184, "calib/mu_w": 0.9222291666666668, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.7004372469635628, "calib/std_conf": 0.10772197773987564, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1905.0, "completions/max_terminated_length": 1905.0, "completions/mean_length": 307.46484375, "completions/mean_terminated_length": 307.46484375, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.03306666666666667, "grad_norm": 0.22495311498641968, "learning_rate": 4.694444444444445e-06, "loss": -0.0077, "num_tokens": 7186698.0, "reward": 0.7656251192092896, "reward_std": 0.4019715487957001, "rewards/accuracy_reward_step": 0.21484375, "rewards/final_brier_reward_step": 0.31250035762786865, "rewards/format_reward_step": 0.9453125, "step": 31 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.179986985899717e-08, "aux_brier/mean_group_std": 0.21362233476934434, "aux_brier/mean_r": 0.48963481535103387, "aux_brier/n_active_tok": 203.75, "aux_brier/n_groups": 10.09375, "aux_brier/n_step_records": 50.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5580796876386548, "calib/avg_num_step_conf": 6.40625, "calib/ece": 0.679844, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.748, "calib/gap": 0.036706096370574315, "calib/mean_conf": 0.912804, "calib/mu_c": 0.9408474576271186, "calib/mu_w": 0.9041413612565443, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6783239999999999, "calib/std_conf": 0.13722150554486712, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 771.0, "completions/max_terminated_length": 771.0, "completions/mean_length": 318.7421875, "completions/mean_terminated_length": 319.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 130.0, "epoch": 0.034133333333333335, "grad_norm": 0.8750050663948059, "learning_rate": 4.666666666666667e-06, "loss": 0.0382, "num_tokens": 7375000.0, "reward": 0.8055583834648132, "reward_std": 0.3464581370353699, "rewards/accuracy_reward_step": 0.234375, "rewards/final_brier_reward_step": 0.34723353385925293, "rewards/format_reward_step": 0.96875, "step": 32 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0750918350010963e-08, "aux_brier/mean_group_std": 0.19903781719884378, "aux_brier/mean_r": 0.4613904062627435, "aux_brier/n_active_tok": 205.125, "aux_brier/n_groups": 9.875, "aux_brier/n_step_records": 51.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4440632742519536, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.7004139442231077, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.7848605577689243, "calib/gap": 0.015710243948923397, "calib/mean_conf": 0.9115693227091634, "calib/mu_c": 0.9239622641509434, "calib/mu_w": 0.90825202020202, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7004139442231077, "calib/std_conf": 0.14730421747254435, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1648.0, "completions/max_terminated_length": 1648.0, "completions/mean_length": 343.94921875, "completions/mean_terminated_length": 345.2980651855469, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.0352, "grad_norm": 0.2861870229244232, "learning_rate": 4.638888888888889e-06, "loss": 0.02, "num_tokens": 7569923.0, "reward": 0.7713510990142822, "reward_std": 0.35344183444976807, "rewards/accuracy_reward_step": 0.20703125, "rewards/final_brier_reward_step": 0.31977948546409607, "rewards/format_reward_step": 0.96875, "step": 33 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.782313591637614e-09, "aux_brier/mean_group_std": 0.2151776268609169, "aux_brier/mean_r": 0.5267375078615244, "aux_brier/n_active_tok": 209.25, "aux_brier/n_groups": 12.0625, "aux_brier/n_step_records": 52.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.43960813492063494, "calib/avg_num_step_conf": 6.546875, "calib/ece": 0.6540553359683795, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.6798418972332015, "calib/gap": -0.01951669973544956, "calib/mean_conf": 0.898798418972332, "calib/mu_c": 0.88421875, "calib/mu_w": 0.9037354497354496, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6499446640316205, "calib/std_conf": 0.12952407134115607, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1697.0, "completions/max_terminated_length": 1697.0, "completions/mean_length": 314.56640625, "completions/mean_terminated_length": 314.56640625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.03626666666666667, "grad_norm": 0.23406533896923065, "learning_rate": 4.611111111111112e-06, "loss": 0.0007, "num_tokens": 7755564.0, "reward": 0.8272361159324646, "reward_std": 0.4333822727203369, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.3636319637298584, "rewards/format_reward_step": 0.97265625, "step": 34 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.877957150371202e-09, "aux_brier/mean_group_std": 0.19853810602911778, "aux_brier/mean_r": 0.5004648630925564, "aux_brier/n_active_tok": 221.875, "aux_brier/n_groups": 12.90625, "aux_brier/n_step_records": 55.46875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5563067150635209, "calib/avg_num_step_conf": 7.0, "calib/ece": 0.6686693548387098, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.6895161290322581, "calib/gap": 0.01738838475499105, "calib/mean_conf": 0.9025403225806451, "calib/mu_c": 0.9158620689655173, "calib/mu_w": 0.8984736842105262, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6686693548387098, "calib/std_conf": 0.12953545405780015, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2087.0, "completions/max_terminated_length": 2087.0, "completions/mean_length": 384.77734375, "completions/mean_terminated_length": 384.77734375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 0.037333333333333336, "grad_norm": 0.1568983644247055, "learning_rate": 4.583333333333333e-06, "loss": 0.1353, "num_tokens": 7963323.0, "reward": 0.7988897562026978, "reward_std": 0.3805563449859619, "rewards/accuracy_reward_step": 0.2265625, "rewards/final_brier_reward_step": 0.35180899500846863, "rewards/format_reward_step": 0.96875, "step": 35 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.21257063764868e-08, "aux_brier/mean_group_std": 0.19363746661753417, "aux_brier/mean_r": 0.536797599648182, "aux_brier/n_active_tok": 204.625, "aux_brier/n_groups": 11.0, "aux_brier/n_step_records": 51.15625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47544031311154594, "calib/avg_num_step_conf": 6.46484375, "calib/ece": 0.48806772908366536, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.5896414342629482, "calib/gap": -0.019987279843444172, "calib/mean_conf": 0.8727689243027889, "calib/mu_c": 0.8611428571428573, "calib/mu_w": 0.8811301369863015, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.47125498007968125, "calib/std_conf": 0.15654894474901493, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2635.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 335.75390625, "completions/mean_terminated_length": 335.75390625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.0384, "grad_norm": 0.013663742691278458, "learning_rate": 4.555555555555556e-06, "loss": 0.051, "num_tokens": 8151988.0, "reward": 1.0257827043533325, "reward_std": 0.3903416395187378, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.5015686750411987, "rewards/format_reward_step": 0.97265625, "step": 36 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.4695351458214887e-09, "aux_brier/mean_group_std": 0.2084204234569628, "aux_brier/mean_r": 0.5694091250774554, "aux_brier/n_active_tok": 207.25, "aux_brier/n_groups": 12.21875, "aux_brier/n_step_records": 51.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5509564509564511, "calib/avg_num_step_conf": 6.4921875, "calib/ece": 0.568703937007874, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.47244094488188976, "calib/gap": 0.07547399267399257, "calib/mean_conf": 0.8246094488188976, "calib/mu_c": 0.8807692307692307, "calib/mu_w": 0.8052952380952382, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.568703937007874, "calib/std_conf": 0.2026109873481307, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2824.0, "completions/max_terminated_length": 2824.0, "completions/mean_length": 345.5234375, "completions/mean_terminated_length": 345.5234375, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.039466666666666664, "grad_norm": 0.11867935955524445, "learning_rate": 4.527777777777778e-06, "loss": 0.061, "num_tokens": 8347538.0, "reward": 0.8588484525680542, "reward_std": 0.393663227558136, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.46664369106292725, "rewards/format_reward_step": 0.9765625, "step": 37 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.980622919983137e-09, "aux_brier/mean_group_std": 0.18962359566282116, "aux_brier/mean_r": 0.6194763075222263, "aux_brier/n_active_tok": 201.625, "aux_brier/n_groups": 11.0, "aux_brier/n_step_records": 50.40625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4947486600028973, "calib/avg_num_step_conf": 6.40625, "calib/ece": 0.5134901960784314, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.42745098039215684, "calib/gap": 0.017711864406779743, "calib/mean_conf": 0.8127058823529412, "calib/mu_c": 0.825, "calib/mu_w": 0.8072881355932202, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5101568627450981, "calib/std_conf": 0.20665432861187635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 967.0, "completions/max_terminated_length": 967.0, "completions/mean_length": 310.40234375, "completions/mean_terminated_length": 311.61962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 95.0, "epoch": 0.04053333333333333, "grad_norm": 0.07928521186113358, "learning_rate": 4.5e-06, "loss": 0.0047, "num_tokens": 8533889.0, "reward": 0.9261570572853088, "reward_std": 0.3925643861293793, "rewards/accuracy_reward_step": 0.3046875, "rewards/final_brier_reward_step": 0.4936906099319458, "rewards/format_reward_step": 0.99609375, "step": 38 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.637955598884584e-08, "aux_brier/mean_group_std": 0.19466138151035445, "aux_brier/mean_r": 0.6580826788741304, "aux_brier/n_active_tok": 194.5, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 48.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5455742676989385, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.4896850393700788, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.36220472440944884, "calib/gap": 0.06821454226195223, "calib/mean_conf": 0.7529133858267717, "calib/mu_c": 0.803134328358209, "calib/mu_w": 0.7349197860962567, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.4894094488188977, "calib/std_conf": 0.2602424981672204, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1142.0, "completions/max_terminated_length": 1142.0, "completions/mean_length": 319.20703125, "completions/mean_terminated_length": 320.4588317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.0416, "grad_norm": 0.17791756987571716, "learning_rate": 4.472222222222223e-06, "loss": 0.0411, "num_tokens": 8721694.0, "reward": 0.8819387555122375, "reward_std": 0.3767951428890228, "rewards/accuracy_reward_step": 0.26171875, "rewards/final_brier_reward_step": 0.5199425220489502, "rewards/format_reward_step": 0.98046875, "step": 39 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.2065278907902321e-08, "aux_brier/mean_group_std": 0.1954778320342164, "aux_brier/mean_r": 0.6755043336249337, "aux_brier/n_active_tok": 215.5, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 53.875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4697894033093766, "calib/avg_num_step_conf": 6.734375, "calib/ece": 0.5315294117647059, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2901960784313726, "calib/gap": -0.012303192378405492, "calib/mean_conf": 0.7586666666666667, "calib/mu_c": 0.7493548387096775, "calib/mu_w": 0.7616580310880829, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5235294117647059, "calib/std_conf": 0.2268708422979192, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1854.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 348.6015625, "completions/mean_terminated_length": 349.9686584472656, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.042666666666666665, "grad_norm": 0.026638193055987358, "learning_rate": 4.444444444444444e-06, "loss": 0.0423, "num_tokens": 8917696.0, "reward": 0.863304078578949, "reward_std": 0.36313459277153015, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.49227890372276306, "rewards/format_reward_step": 0.99609375, "step": 40 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.016353684020089e-08, "aux_brier/mean_group_std": 0.16996555290394572, "aux_brier/mean_r": 0.7222421316455907, "aux_brier/n_active_tok": 187.125, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 46.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5459464651639345, "calib/avg_num_step_conf": 5.88671875, "calib/ece": 0.26088000000000006, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.14, "calib/gap": 0.03965420081967208, "calib/mean_conf": 0.6423199999999999, "calib/mu_c": 0.6626229508196722, "calib/mu_w": 0.6229687500000001, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.20760000000000006, "calib/std_conf": 0.2663790862661707, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 314.6640625, "completions/mean_terminated_length": 314.6640625, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.04373333333333333, "grad_norm": 0.0068152002058923244, "learning_rate": 4.416666666666667e-06, "loss": 0.0417, "num_tokens": 9105498.0, "reward": 1.1296844482421875, "reward_std": 0.4043242037296295, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6593624353408813, "rewards/format_reward_step": 0.9765625, "step": 41 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.121827070797558e-08, "aux_brier/mean_group_std": 0.16372811538673707, "aux_brier/mean_r": 0.7838786712115098, "aux_brier/n_active_tok": 169.75, "aux_brier/n_groups": 8.0625, "aux_brier/n_step_records": 42.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5342356687898089, "calib/avg_num_step_conf": 5.41015625, "calib/ece": 0.30944664031620556, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.1225296442687747, "calib/gap": 0.037894771762208124, "calib/mean_conf": 0.6322134387351778, "calib/mu_c": 0.6557291666666667, "calib/mu_w": 0.6178343949044586, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28110671936758896, "calib/std_conf": 0.2690046098779832, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 854.0, "completions/max_terminated_length": 854.0, "completions/mean_length": 270.6328125, "completions/mean_terminated_length": 271.6941223144531, "completions/min_length": 0.0, "completions/min_terminated_length": 127.0, "epoch": 0.0448, "grad_norm": 0.026243092492222786, "learning_rate": 4.388888888888889e-06, "loss": 0.0172, "num_tokens": 9279148.0, "reward": 1.0233588218688965, "reward_std": 0.38705065846443176, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.632498025894165, "rewards/format_reward_step": 0.98046875, "step": 42 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.337705900169802e-09, "aux_brier/mean_group_std": 0.15379918388874533, "aux_brier/mean_r": 0.7896745983683803, "aux_brier/n_active_tok": 179.0, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 44.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5055303460011344, "calib/avg_num_step_conf": 5.61328125, "calib/ece": 0.32392388451443577, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.12598425196850394, "calib/gap": 0.013710531291359462, "calib/mean_conf": 0.6046587926509186, "calib/mu_c": 0.6139430894308943, "calib/mu_w": 0.6002325581395348, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3028740157480315, "calib/std_conf": 0.26980233510014134, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1046.0, "completions/max_terminated_length": 1046.0, "completions/mean_length": 308.04296875, "completions/mean_terminated_length": 309.2510070800781, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.04586666666666667, "grad_norm": 0.05714472383260727, "learning_rate": 4.361111111111112e-06, "loss": 0.0272, "num_tokens": 9463231.0, "reward": 0.9739567041397095, "reward_std": 0.339687317609787, "rewards/accuracy_reward_step": 0.3203125, "rewards/final_brier_reward_step": 0.6302018165588379, "rewards/format_reward_step": 0.9921875, "step": 43 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.8695633902110487e-08, "aux_brier/mean_group_std": 0.14812661490281, "aux_brier/mean_r": 0.8328716313614364, "aux_brier/n_active_tok": 189.875, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 47.46875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.44824582244396927, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.3391666666666666, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0992063492063492, "calib/gap": -0.04897837966262775, "calib/mean_conf": 0.5652777777777778, "calib/mu_c": 0.5297101449275362, "calib/mu_w": 0.578688524590164, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31531746031746033, "calib/std_conf": 0.2576425522778954, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2209.0, "completions/max_terminated_length": 2209.0, "completions/mean_length": 332.609375, "completions/mean_terminated_length": 332.609375, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 0.046933333333333334, "grad_norm": 0.09062433987855911, "learning_rate": 4.333333333333334e-06, "loss": 0.0373, "num_tokens": 9654699.0, "reward": 0.9168442487716675, "reward_std": 0.2913611829280853, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.6205019950866699, "rewards/format_reward_step": 0.984375, "step": 44 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.447890906786547e-08, "aux_brier/mean_group_std": 0.14167737950320608, "aux_brier/mean_r": 0.8465967575337956, "aux_brier/n_active_tok": 190.0, "aux_brier/n_groups": 11.5, "aux_brier/n_step_records": 47.5, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6001746128150623, "calib/avg_num_step_conf": 5.9453125, "calib/ece": 0.20305555555555554, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.09126984126984126, "calib/gap": 0.08909049498937138, "calib/mean_conf": 0.4623412698412699, "calib/mu_c": 0.5252702702702703, "calib/mu_w": 0.4361797752808989, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18587301587301586, "calib/std_conf": 0.2841020290317808, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2491.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 336.0703125, "completions/mean_terminated_length": 336.0703125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.048, "grad_norm": 0.008077290840446949, "learning_rate": 4.305555555555556e-06, "loss": 0.0107, "num_tokens": 9845781.0, "reward": 0.9471868276596069, "reward_std": 0.3378447890281677, "rewards/accuracy_reward_step": 0.2890625, "rewards/final_brier_reward_step": 0.6949973106384277, "rewards/format_reward_step": 0.96875, "step": 45 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.092643408175171e-08, "aux_brier/mean_group_std": 0.10506120241370358, "aux_brier/mean_r": 0.8793923702189638, "aux_brier/n_active_tok": 180.0, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 45.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5154048405582148, "calib/avg_num_step_conf": 5.6328125, "calib/ece": 0.22736220472440943, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.047244094488188976, "calib/gap": 0.01055147306681059, "calib/mean_conf": 0.39586614173228346, "calib/mu_c": 0.4026373626373627, "calib/mu_w": 0.3920858895705521, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1324803149606299, "calib/std_conf": 0.2679049620595658, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1749.0, "completions/max_terminated_length": 1749.0, "completions/mean_length": 304.203125, "completions/mean_terminated_length": 305.3960876464844, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.04906666666666667, "grad_norm": 0.016041941940784454, "learning_rate": 4.277777777777778e-06, "loss": 0.0286, "num_tokens": 10028425.0, "reward": 1.0236575603485107, "reward_std": 0.27823662757873535, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6961929798126221, "rewards/format_reward_step": 0.98828125, "step": 46 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.749983195367677e-07, "aux_brier/mean_group_std": 0.10558489392590886, "aux_brier/mean_r": 0.8945679720612574, "aux_brier/n_active_tok": 166.375, "aux_brier/n_groups": 8.75, "aux_brier/n_step_records": 41.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48996539792387545, "calib/avg_num_step_conf": 5.21875, "calib/ece": 0.22125490196078432, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0196078431372549, "calib/gap": -0.008882352941176452, "calib/mean_conf": 0.3655686274509804, "calib/mu_c": 0.35964705882352943, "calib/mu_w": 0.3685294117647059, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12674509803921566, "calib/std_conf": 0.2530588751849672, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 727.0, "completions/max_terminated_length": 727.0, "completions/mean_length": 293.73046875, "completions/mean_terminated_length": 294.88238525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 68.0, "epoch": 0.050133333333333335, "grad_norm": 0.023996079340577126, "learning_rate": 4.25e-06, "loss": 0.0169, "num_tokens": 10209596.0, "reward": 0.9986698627471924, "reward_std": 0.31220507621765137, "rewards/accuracy_reward_step": 0.33203125, "rewards/final_brier_reward_step": 0.69780433177948, "rewards/format_reward_step": 0.984375, "step": 47 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1937660163163777e-07, "aux_brier/mean_group_std": 0.09591855951449497, "aux_brier/mean_r": 0.9127715395966829, "aux_brier/n_active_tok": 152.5, "aux_brier/n_groups": 8.28125, "aux_brier/n_step_records": 38.125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4470385106223257, "calib/avg_num_step_conf": 4.765625, "calib/ece": 0.21876000000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.04307709631409051, "calib/mean_conf": 0.29916, "calib/mu_c": 0.26935064935064934, "calib/mu_w": 0.31242774566473985, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.10496000000000001, "calib/std_conf": 0.20938312826013464, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2480.0, "completions/max_terminated_length": 2480.0, "completions/mean_length": 280.02734375, "completions/mean_terminated_length": 281.1255187988281, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.0512, "grad_norm": 0.03649129346013069, "learning_rate": 4.222222222222223e-06, "loss": 0.1123, "num_tokens": 10384971.0, "reward": 0.9659624099731445, "reward_std": 0.27263981103897095, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.7075996398925781, "rewards/format_reward_step": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.2386917302986689e-07, "aux_brier/mean_group_std": 0.08437811842763174, "aux_brier/mean_r": 0.9232820856044794, "aux_brier/n_active_tok": 156.125, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 39.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48440043407487793, "calib/avg_num_step_conf": 4.88671875, "calib/ece": 0.24216867469879522, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.01606425702811245, "calib/gap": -0.01642973412913734, "calib/mean_conf": 0.29044176706827307, "calib/mu_c": 0.28041237113402057, "calib/mu_w": 0.2968421052631579, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07152610441767071, "calib/std_conf": 0.22598691547599692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2252.0, "completions/max_terminated_length": 2252.0, "completions/mean_length": 306.89453125, "completions/mean_terminated_length": 308.0980529785156, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.05226666666666667, "grad_norm": 0.01977912150323391, "learning_rate": 4.194444444444445e-06, "loss": 0.0433, "num_tokens": 10568072.0, "reward": 1.037772297859192, "reward_std": 0.34205636382102966, "rewards/accuracy_reward_step": 0.3828125, "rewards/final_brier_reward_step": 0.674526572227478, "rewards/format_reward_step": 0.97265625, "step": 49 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.162849181783447e-07, "aux_brier/mean_group_std": 0.06460101808658111, "aux_brier/mean_r": 0.9396358134271815, "aux_brier/n_active_tok": 164.375, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 41.09375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47225407747196746, "calib/avg_num_step_conf": 5.13671875, "calib/ece": 0.27916996047430825, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011857707509881422, "calib/gap": -0.025708460754332307, "calib/mean_conf": 0.2411462450592885, "calib/mu_c": 0.22651376146788993, "calib/mu_w": 0.25222222222222224, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04474308300395257, "calib/std_conf": 0.19932264337447902, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2143.0, "completions/max_terminated_length": 2143.0, "completions/mean_length": 309.5390625, "completions/mean_terminated_length": 309.5390625, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "epoch": 0.05333333333333334, "grad_norm": 0.09591098874807358, "learning_rate": 4.166666666666667e-06, "loss": 0.0166, "num_tokens": 10752674.0, "reward": 1.085662603378296, "reward_std": 0.3365734815597534, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6551504135131836, "rewards/format_reward_step": 0.984375, "step": 50 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.15844116724573e-07, "aux_brier/mean_group_std": 0.06538003243288988, "aux_brier/mean_r": 0.9481303794126691, "aux_brier/n_active_tok": 155.75, "aux_brier/n_groups": 8.5, "aux_brier/n_step_records": 38.9375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4995325854700855, "calib/avg_num_step_conf": 4.87109375, "calib/ece": 0.23055555555555557, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.0034455128205128638, "calib/mean_conf": 0.21349206349206354, "calib/mu_c": 0.21562500000000004, "calib/mu_w": 0.21217948717948718, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03154761904761905, "calib/std_conf": 0.1917683682984996, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 707.0, "completions/max_terminated_length": 707.0, "completions/mean_length": 270.48828125, "completions/mean_terminated_length": 271.5490417480469, "completions/min_length": 0.0, "completions/min_terminated_length": 26.0, "epoch": 0.0544, "grad_norm": 0.024282297119498253, "learning_rate": 4.138888888888889e-06, "loss": -0.0216, "num_tokens": 10931215.0, "reward": 1.0321258306503296, "reward_std": 0.3157980442047119, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.683190643787384, "rewards/format_reward_step": 0.97265625, "step": 51 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.602986376500695e-07, "aux_brier/mean_group_std": 0.07154886775889922, "aux_brier/mean_r": 0.9448803675875347, "aux_brier/n_active_tok": 134.875, "aux_brier/n_groups": 7.34375, "aux_brier/n_step_records": 33.71875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6059523809523809, "calib/avg_num_step_conf": 4.25390625, "calib/ece": 0.36125000000000007, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00390625, "calib/gap": 0.047382173382173404, "calib/mean_conf": 0.159140625, "calib/mu_c": 0.18246153846153848, "calib/mu_w": 0.13507936507936508, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006289062500000001, "calib/std_conf": 0.15922432752129737, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 762.0, "completions/max_terminated_length": 762.0, "completions/mean_length": 254.734375, "completions/mean_terminated_length": 255.7333526611328, "completions/min_length": 0.0, "completions/min_terminated_length": 53.0, "epoch": 0.055466666666666664, "grad_norm": 0.022697865962982178, "learning_rate": 4.111111111111111e-06, "loss": 0.0035, "num_tokens": 11104379.0, "reward": 1.1615979671478271, "reward_std": 0.302357017993927, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6229547262191772, "rewards/format_reward_step": 0.99609375, "step": 52 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.3797929529490993e-07, "aux_brier/mean_group_std": 0.07033247463123199, "aux_brier/mean_r": 0.9417557375860584, "aux_brier/n_active_tok": 153.375, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 38.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5232601880877743, "calib/avg_num_step_conf": 4.8125, "calib/ece": 0.30119215686274514, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.009698746081504728, "calib/mean_conf": 0.15049411764705883, "calib/mu_c": 0.15600909090909093, "calib/mu_w": 0.1463103448275862, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010156862745098038, "calib/std_conf": 0.1449343832759984, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2097.0, "completions/max_terminated_length": 2097.0, "completions/mean_length": 287.984375, "completions/mean_terminated_length": 287.984375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.05653333333333333, "grad_norm": 0.018405409529805183, "learning_rate": 4.083333333333334e-06, "loss": 0.0012, "num_tokens": 11283927.0, "reward": 1.0890626907348633, "reward_std": 0.3066707253456116, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6531254053115845, "rewards/format_reward_step": 0.9921875, "step": 53 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.1869579625597453e-07, "aux_brier/mean_group_std": 0.05911314516603196, "aux_brier/mean_r": 0.9554262852411152, "aux_brier/n_active_tok": 147.625, "aux_brier/n_groups": 9.25, "aux_brier/n_step_records": 36.90625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5177187153931341, "calib/avg_num_step_conf": 4.61328125, "calib/ece": 0.40568627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": 0.01086194167589516, "calib/mean_conf": 0.12176470588235293, "calib/mu_c": 0.12713178294573643, "calib/mu_w": 0.11626984126984127, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010784313725490196, "calib/std_conf": 0.151917236011792, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 866.0, "completions/max_terminated_length": 866.0, "completions/mean_length": 256.72265625, "completions/mean_terminated_length": 257.72943115234375, "completions/min_length": 0.0, "completions/min_terminated_length": 51.0, "epoch": 0.0576, "grad_norm": 0.07563520967960358, "learning_rate": 4.055555555555556e-06, "loss": -0.0087, "num_tokens": 11455880.0, "reward": 1.1475919485092163, "reward_std": 0.2706340551376343, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5825550556182861, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.140674072424579e-07, "aux_brier/mean_group_std": 0.07076819763684108, "aux_brier/mean_r": 0.9566779403252893, "aux_brier/n_active_tok": 136.5, "aux_brier/n_groups": 7.65625, "aux_brier/n_step_records": 34.125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5379844961240311, "calib/avg_num_step_conf": 4.27734375, "calib/ece": 0.26633466135458167, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.00952572233967583, "calib/mean_conf": 0.08920318725099602, "calib/mu_c": 0.09546511627906978, "calib/mu_w": 0.08593939393939395, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006454183266932271, "calib/std_conf": 0.11074972030488545, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1590.0, "completions/max_terminated_length": 1590.0, "completions/mean_length": 265.02734375, "completions/mean_terminated_length": 266.0666809082031, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.058666666666666666, "grad_norm": 0.02490275539457798, "learning_rate": 4.027777777777779e-06, "loss": 0.0227, "num_tokens": 11631551.0, "reward": 0.9925234913825989, "reward_std": 0.24763819575309753, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.6810316443443298, "rewards/format_reward_step": 0.97265625, "step": 55 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.3198031052238406e-07, "aux_brier/mean_group_std": 0.06830002255107125, "aux_brier/mean_r": 0.9538539048748047, "aux_brier/n_active_tok": 145.75, "aux_brier/n_groups": 8.03125, "aux_brier/n_step_records": 36.4375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5757305890889901, "calib/avg_num_step_conf": 4.609375, "calib/ece": 0.2807030468750001, "calib/final_conf_rate": 1.0, "calib/format_rate": 1.0, "calib/frac_conf_gt_0.9": 0.0078125, "calib/gap": 0.031841365525430446, "calib/mean_conf": 0.09015632812500002, "calib/mu_c": 0.11043032258064517, "calib/mu_w": 0.07858895705521472, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0037890625000000008, "calib/std_conf": 0.12237860924434397, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 905.0, "completions/max_terminated_length": 905.0, "completions/mean_length": 281.45703125, "completions/mean_terminated_length": 282.560791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 69.0, "epoch": 0.05973333333333333, "grad_norm": 0.06439587473869324, "learning_rate": 4.000000000000001e-06, "loss": -0.0617, "num_tokens": 11810444.0, "reward": 1.036743402481079, "reward_std": 0.24663712084293365, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6938486099243164, "rewards/format_reward_step": 1.0, "step": 56 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.8740293614705195e-06, "aux_brier/mean_group_std": 0.0428314464021801, "aux_brier/mean_r": 0.9725623138676681, "aux_brier/n_active_tok": 145.5, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 36.375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5411844373307068, "calib/avg_num_step_conf": 4.58203125, "calib/ece": 0.40035294117647063, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.021226914553065745, "calib/mean_conf": 0.09062745098039215, "calib/mu_c": 0.10153225806451613, "calib/mu_w": 0.08030534351145038, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002352941176470588, "calib/std_conf": 0.11196462340178034, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 824.0, "completions/max_terminated_length": 824.0, "completions/mean_length": 278.06640625, "completions/mean_terminated_length": 279.1568908691406, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.0608, "grad_norm": 0.07354693859815598, "learning_rate": 3.972222222222223e-06, "loss": -0.0216, "num_tokens": 11988421.0, "reward": 1.1314563751220703, "reward_std": 0.29179269075393677, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.5883257389068604, "rewards/format_reward_step": 0.9921875, "step": 57 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.8155679222253696e-06, "aux_brier/mean_group_std": 0.05708504997291442, "aux_brier/mean_r": 0.9610682461370297, "aux_brier/n_active_tok": 156.125, "aux_brier/n_groups": 10.46875, "aux_brier/n_step_records": 39.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49875278096136993, "calib/avg_num_step_conf": 4.9296875, "calib/ece": 0.32303543228346465, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.021296771940942504, "calib/mean_conf": 0.07263385905511811, "calib/mu_c": 0.05896703296703296, "calib/mu_w": 0.08026380490797547, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018700787401574805, "calib/std_conf": 0.12977392455782227, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2442.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 330.9765625, "completions/mean_terminated_length": 330.9765625, "completions/min_length": 67.0, "completions/min_terminated_length": 67.0, "epoch": 0.06186666666666667, "grad_norm": 0.0347810834646225, "learning_rate": 3.944444444444445e-06, "loss": 0.0667, "num_tokens": 12179471.0, "reward": 1.0157365798950195, "reward_std": 0.26782283186912537, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6566964387893677, "rewards/format_reward_step": 0.9921875, "step": 58 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.113791698956426e-07, "aux_brier/mean_group_std": 0.05058431874779699, "aux_brier/mean_r": 0.964331699346551, "aux_brier/n_active_tok": 154.625, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 38.65625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5291902071563088, "calib/avg_num_step_conf": 4.83203125, "calib/ece": 0.41102766798418977, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.015797865662272453, "calib/mean_conf": 0.07055335968379448, "calib/mu_c": 0.07898305084745762, "calib/mu_w": 0.06318518518518516, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00758893280632411, "calib/std_conf": 0.12663355570078566, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2805.0, "completions/max_terminated_length": 2805.0, "completions/mean_length": 341.86328125, "completions/mean_terminated_length": 341.86328125, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.06293333333333333, "grad_norm": 0.039401791989803314, "learning_rate": 3.916666666666667e-06, "loss": 0.0645, "num_tokens": 12373236.0, "reward": 1.097151756286621, "reward_std": 0.2567059099674225, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5761074423789978, "rewards/format_reward_step": 0.984375, "step": 59 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.0313276794258215e-07, "aux_brier/mean_group_std": 0.05661215187662885, "aux_brier/mean_r": 0.966159721868029, "aux_brier/n_active_tok": 141.875, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 35.46875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4236207113233631, "calib/avg_num_step_conf": 4.51171875, "calib/ece": 0.38878431372549027, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": -0.02630891039336434, "calib/mean_conf": 0.05341176470588236, "calib/mu_c": 0.03834862385321101, "calib/mu_w": 0.06465753424657535, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007372549019607843, "calib/std_conf": 0.09131998824873205, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1066.0, "completions/max_terminated_length": 1066.0, "completions/mean_length": 305.4140625, "completions/mean_terminated_length": 306.6117858886719, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.064, "grad_norm": 0.013780745677649975, "learning_rate": 3.88888888888889e-06, "loss": 0.0349, "num_tokens": 12560278.0, "reward": 1.0688536167144775, "reward_std": 0.3076348900794983, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.5879144668579102, "rewards/format_reward_step": 0.9921875, "step": 60 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.3394812520560748e-07, "aux_brier/mean_group_std": 0.045694190172811344, "aux_brier/mean_r": 0.9700046471339311, "aux_brier/n_active_tok": 146.5, "aux_brier/n_groups": 9.75, "aux_brier/n_step_records": 36.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5012419274714357, "calib/avg_num_step_conf": 4.5859375, "calib/ece": 0.44771653543307083, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.006265524093392941, "calib/mean_conf": 0.04055118110236221, "calib/mu_c": 0.03729508196721312, "calib/mu_w": 0.043560606060606064, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003976377952755906, "calib/std_conf": 0.06987398203012857, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2000.0, "completions/max_terminated_length": 2000.0, "completions/mean_length": 292.96484375, "completions/mean_terminated_length": 292.96484375, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.06506666666666666, "grad_norm": 0.01826075091958046, "learning_rate": 3.861111111111112e-06, "loss": 0.0699, "num_tokens": 12739341.0, "reward": 1.1030099391937256, "reward_std": 0.24034938216209412, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.5370398759841919, "rewards/format_reward_step": 0.984375, "step": 61 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.24482866865106e-07, "aux_brier/mean_group_std": 0.05190789109701259, "aux_brier/mean_r": 0.9692692061809874, "aux_brier/n_active_tok": 155.75, "aux_brier/n_groups": 9.90625, "aux_brier/n_step_records": 38.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5052802073210236, "calib/avg_num_step_conf": 4.8828125, "calib/ece": 0.38292063492063494, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": -0.0019210884353741478, "calib/mean_conf": 0.04715873015873016, "calib/mu_c": 0.046038095238095246, "calib/mu_w": 0.047959183673469394, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006706349206349207, "calib/std_conf": 0.10700359085343987, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2771.0, "completions/max_terminated_length": 2771.0, "completions/mean_length": 352.11328125, "completions/mean_terminated_length": 353.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.06613333333333334, "grad_norm": 0.02990497089922428, "learning_rate": 3.833333333333334e-06, "loss": 0.0459, "num_tokens": 12936562.0, "reward": 1.0500023365020752, "reward_std": 0.3055551052093506, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.5984466075897217, "rewards/format_reward_step": 0.98046875, "step": 62 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.753689738927868e-06, "aux_brier/mean_group_std": 0.044572951761300696, "aux_brier/mean_r": 0.9684097674822255, "aux_brier/n_active_tok": 160.5, "aux_brier/n_groups": 9.40625, "aux_brier/n_step_records": 40.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5533404832502682, "calib/avg_num_step_conf": 5.03125, "calib/ece": 0.43831349206349207, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01846918175509432, "calib/mean_conf": 0.0418452380952381, "calib/mu_c": 0.051446280991735545, "calib/mu_w": 0.03297709923664122, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.07575197006935037, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2141.0, "completions/max_terminated_length": 2141.0, "completions/mean_length": 340.15234375, "completions/mean_terminated_length": 340.15234375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.0672, "grad_norm": 0.010200646705925465, "learning_rate": 3.8055555555555556e-06, "loss": 0.0208, "num_tokens": 13132281.0, "reward": 1.1030884981155396, "reward_std": 0.2642902135848999, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.5529792308807373, "rewards/format_reward_step": 0.984375, "step": 63 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.894694327680968e-06, "aux_brier/mean_group_std": 0.046327561448707186, "aux_brier/mean_r": 0.9648705577008491, "aux_brier/n_active_tok": 148.25, "aux_brier/n_groups": 9.0625, "aux_brier/n_step_records": 37.0625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.545457423235201, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.43654444444444446, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0054552706552706415, "calib/mean_conf": 0.03583650793650794, "calib/mu_c": 0.03875897435897435, "calib/mu_w": 0.03330370370370371, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004047619047619047, "calib/std_conf": 0.05919183516085858, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2073.0, "completions/max_terminated_length": 2073.0, "completions/mean_length": 332.88671875, "completions/mean_terminated_length": 334.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 67.0, "epoch": 0.06826666666666667, "grad_norm": 0.009221460670232773, "learning_rate": 3.777777777777778e-06, "loss": 0.0649, "num_tokens": 13321276.0, "reward": 1.0858047008514404, "reward_std": 0.2983360290527344, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5541560053825378, "rewards/format_reward_step": 0.98046875, "step": 64 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.005934009069904e-07, "aux_brier/mean_group_std": 0.043607361752269515, "aux_brier/mean_r": 0.9734475673895174, "aux_brier/n_active_tok": 137.125, "aux_brier/n_groups": 7.75, "aux_brier/n_step_records": 34.28125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5181659823317157, "calib/avg_num_step_conf": 4.296875, "calib/ece": 0.4151764705882353, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008969764837625984, "calib/mean_conf": 0.03188235294117647, "calib/mu_c": 0.0368421052631579, "calib/mu_w": 0.027872340425531914, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.054560506818280105, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 291.8515625, "completions/mean_terminated_length": 291.8515625, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.06933333333333333, "grad_norm": 0.0236252062022686, "learning_rate": 3.7500000000000005e-06, "loss": 0.0106, "num_tokens": 13501014.0, "reward": 1.0858122110366821, "reward_std": 0.17494623363018036, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.5776238441467285, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.475078657170897e-07, "aux_brier/mean_group_std": 0.034501214740826486, "aux_brier/mean_r": 0.978312505592935, "aux_brier/n_active_tok": 166.125, "aux_brier/n_groups": 10.875, "aux_brier/n_step_records": 41.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5534610215053763, "calib/avg_num_step_conf": 5.19921875, "calib/ece": 0.34677290836653385, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014120295698924729, "calib/mean_conf": 0.038884462151394426, "calib/mu_c": 0.04760416666666667, "calib/mu_w": 0.03348387096774194, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.08098220869492957, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2287.0, "completions/max_terminated_length": 2287.0, "completions/mean_length": 381.421875, "completions/mean_terminated_length": 381.421875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.0704, "grad_norm": 0.007549663074314594, "learning_rate": 3.7222222222222225e-06, "loss": 0.013, "num_tokens": 13705010.0, "reward": 1.023549199104309, "reward_std": 0.2723032832145691, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6332593560218811, "rewards/format_reward_step": 0.98046875, "step": 66 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.862533782711846e-07, "aux_brier/mean_group_std": 0.06913670058011565, "aux_brier/mean_r": 0.9578773176077037, "aux_brier/n_active_tok": 158.5, "aux_brier/n_groups": 8.96875, "aux_brier/n_step_records": 39.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4670850252245601, "calib/avg_num_step_conf": 4.98046875, "calib/ece": 0.46768627450980393, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010070136581764491, "calib/mean_conf": 0.03874509803921569, "calib/mu_c": 0.043720930232558144, "calib/mu_w": 0.03365079365079365, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00027450980392156797, "calib/std_conf": 0.08100831309005435, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1314.0, "completions/max_terminated_length": 1314.0, "completions/mean_length": 351.9921875, "completions/mean_terminated_length": 353.37255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.07146666666666666, "grad_norm": 0.03831132501363754, "learning_rate": 3.694444444444445e-06, "loss": -0.0231, "num_tokens": 13900128.0, "reward": 1.1310813426971436, "reward_std": 0.2168925702571869, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5243257880210876, "rewards/format_reward_step": 0.9921875, "step": 67 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.904301620449857e-06, "aux_brier/mean_group_std": 0.03950391046888913, "aux_brier/mean_r": 0.9778407633613662, "aux_brier/n_active_tok": 145.875, "aux_brier/n_groups": 8.40625, "aux_brier/n_step_records": 36.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.4421033119130005, "calib/avg_num_step_conf": 4.56640625, "calib/ece": 0.4372549019607843, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.011134453781512607, "calib/mean_conf": 0.03333333333333333, "calib/mu_c": 0.027394957983193274, "calib/mu_w": 0.03852941176470588, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.00196078431372549, "calib/std_conf": 0.0495496054342798, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1891.0, "completions/max_terminated_length": 1891.0, "completions/mean_length": 322.6328125, "completions/mean_terminated_length": 322.6328125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.07253333333333334, "grad_norm": 0.02294660173356533, "learning_rate": 3.6666666666666666e-06, "loss": 0.0139, "num_tokens": 14086810.0, "reward": 1.0953232049942017, "reward_std": 0.2644977569580078, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.5453554391860962, "rewards/format_reward_step": 0.98828125, "step": 68 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.828993963754911e-06, "aux_brier/mean_group_std": 0.054415173657488144, "aux_brier/mean_r": 0.9656869927496436, "aux_brier/n_active_tok": 138.75, "aux_brier/n_groups": 8.4375, "aux_brier/n_step_records": 34.6875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5061778135911899, "calib/avg_num_step_conf": 4.44140625, "calib/ece": 0.38887096774193547, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008326618318560304, "calib/mean_conf": 0.034999999999999996, "calib/mu_c": 0.030098039215686274, "calib/mu_w": 0.03842465753424658, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.006290322580645161, "calib/std_conf": 0.06315623229277814, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2488.0, "completions/max_terminated_length": 2488.0, "completions/mean_length": 388.2890625, "completions/mean_terminated_length": 391.3464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.0736, "grad_norm": 0.021312355995178223, "learning_rate": 3.638888888888889e-06, "loss": 0.0807, "num_tokens": 14290708.0, "reward": 1.0279853343963623, "reward_std": 0.2581242322921753, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5885039567947388, "rewards/format_reward_step": 0.96484375, "step": 69 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.4806776673935715e-06, "aux_brier/mean_group_std": 0.047341455937476046, "aux_brier/mean_r": 0.9691388794667096, "aux_brier/n_active_tok": 164.375, "aux_brier/n_groups": 10.875, "aux_brier/n_step_records": 41.09375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5741336803177647, "calib/avg_num_step_conf": 5.15625, "calib/ece": 0.3693927125506073, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.008097165991902834, "calib/gap": -0.0003992603752910606, "calib/mean_conf": 0.04299595141700405, "calib/mu_c": 0.04275510204081632, "calib/mu_w": 0.04315436241610738, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.007813765182186235, "calib/std_conf": 0.09620286838989253, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2572.0, "completions/max_terminated_length": 2572.0, "completions/mean_length": 410.12109375, "completions/mean_terminated_length": 410.12109375, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.07466666666666667, "grad_norm": 0.0068456740118563175, "learning_rate": 3.6111111111111115e-06, "loss": 0.1273, "num_tokens": 14502691.0, "reward": 1.0172240734100342, "reward_std": 0.28901076316833496, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.6001461148262024, "rewards/format_reward_step": 0.9609375, "step": 70 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.7551963700255264e-06, "aux_brier/mean_group_std": 0.05001241877581945, "aux_brier/mean_r": 0.9647166802332054, "aux_brier/n_active_tok": 170.375, "aux_brier/n_groups": 10.875, "aux_brier/n_step_records": 42.59375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4841410559495666, "calib/avg_num_step_conf": 5.3828125, "calib/ece": 0.39889156626506017, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.00132466509062254, "calib/mean_conf": 0.040546184738955826, "calib/mu_c": 0.041296296296296296, "calib/mu_w": 0.039971631205673756, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0028514056224899598, "calib/std_conf": 0.08503676783035762, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2671.0, "completions/max_terminated_length": 2671.0, "completions/mean_length": 403.734375, "completions/mean_terminated_length": 406.91339111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.07573333333333333, "grad_norm": 0.02146107144653797, "learning_rate": 3.5833333333333335e-06, "loss": 0.042, "num_tokens": 14710455.0, "reward": 1.0534417629241943, "reward_std": 0.26898854970932007, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.573142409324646, "rewards/format_reward_step": 0.96875, "step": 71 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.2279897723202e-08, "aux_brier/mean_group_std": 0.0359983421658229, "aux_brier/mean_r": 0.9738087378558621, "aux_brier/n_active_tok": 168.0, "aux_brier/n_groups": 10.53125, "aux_brier/n_step_records": 42.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5869316738633477, "calib/avg_num_step_conf": 5.28515625, "calib/ece": 0.4576812749003984, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007145161290322596, "calib/mean_conf": 0.03952988047808765, "calib/mu_c": 0.04314516129032259, "calib/mu_w": 0.03599999999999999, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.0015936254980079682, "calib/std_conf": 0.05711624150285109, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2524.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 390.29296875, "completions/mean_terminated_length": 393.36614990234375, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.0768, "grad_norm": 0.010083409957587719, "learning_rate": 3.555555555555556e-06, "loss": 0.0375, "num_tokens": 14914778.0, "reward": 1.1078994274139404, "reward_std": 0.2597416639328003, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5331599712371826, "rewards/format_reward_step": 0.98046875, "step": 72 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.682308865690878e-07, "aux_brier/mean_group_std": 0.0585894279021773, "aux_brier/mean_r": 0.9647357698864109, "aux_brier/n_active_tok": 143.375, "aux_brier/n_groups": 7.625, "aux_brier/n_step_records": 35.84375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5171365914786967, "calib/avg_num_step_conf": 4.57421875, "calib/ece": 0.4904189723320158, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": -0.004930952380952376, "calib/mean_conf": 0.04262450592885375, "calib/mu_c": 0.04028571428571429, "calib/mu_w": 0.04521666666666667, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.003675889328063241, "calib/std_conf": 0.07666423193868173, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1189.0, "completions/max_terminated_length": 1189.0, "completions/mean_length": 348.09375, "completions/mean_terminated_length": 349.4588317871094, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.07786666666666667, "grad_norm": 0.01929972879588604, "learning_rate": 3.5277777777777784e-06, "loss": 0.0048, "num_tokens": 15110922.0, "reward": 1.1335673332214355, "reward_std": 0.31123343110084534, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.4952068626880646, "rewards/format_reward_step": 0.98046875, "step": 73 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.198678284161872e-06, "aux_brier/mean_group_std": 0.0269770449703097, "aux_brier/mean_r": 0.9854415484358731, "aux_brier/n_active_tok": 144.0, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 36.0, "calib/answer_extract_rate": 0.94921875, "calib/auroc": 0.5560075093867335, "calib/avg_num_step_conf": 4.63671875, "calib/ece": 0.3832921810699588, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013364622444722564, "calib/mean_conf": 0.03646090534979424, "calib/mu_c": 0.044215686274509794, "calib/mu_w": 0.03085106382978723, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0, "calib/std_conf": 0.0647234184788772, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2109.0, "completions/max_terminated_length": 2109.0, "completions/mean_length": 364.51171875, "completions/mean_terminated_length": 373.260009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.07893333333333333, "grad_norm": 0.035346951335668564, "learning_rate": 3.5e-06, "loss": -0.0067, "num_tokens": 15308165.0, "reward": 1.015311598777771, "reward_std": 0.283864289522171, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.5768714547157288, "rewards/format_reward_step": 0.9453125, "step": 74 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.212699522611942e-07, "aux_brier/mean_group_std": 0.0841872819271489, "aux_brier/mean_r": 0.9498334509439114, "aux_brier/n_active_tok": 148.625, "aux_brier/n_groups": 7.875, "aux_brier/n_step_records": 37.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5091479700854702, "calib/avg_num_step_conf": 4.66015625, "calib/ece": 0.5732936507936508, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.0011858974358974483, "calib/mean_conf": 0.04781746031746032, "calib/mu_c": 0.04826923076923078, "calib/mu_w": 0.04708333333333333, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.0010317460317460316, "calib/std_conf": 0.09127621613347724, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2380.0, "completions/max_terminated_length": 2380.0, "completions/mean_length": 354.58984375, "completions/mean_terminated_length": 354.58984375, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 0.08, "grad_norm": 0.030502239242196083, "learning_rate": 3.4722222222222224e-06, "loss": 0.0479, "num_tokens": 15503692.0, "reward": 1.1990516185760498, "reward_std": 0.24975721538066864, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.41339412331581116, "rewards/format_reward_step": 0.97265625, "step": 75 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7227163162658599e-06, "aux_brier/mean_group_std": 0.06900281346666806, "aux_brier/mean_r": 0.9515599259935525, "aux_brier/n_active_tok": 147.5, "aux_brier/n_groups": 8.28125, "aux_brier/n_step_records": 36.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5321428571428571, "calib/avg_num_step_conf": 4.609375, "calib/ece": 0.5207936507936509, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0004821428571428546, "calib/mean_conf": 0.04285714285714286, "calib/mu_c": 0.04264285714285715, "calib/mu_w": 0.043125000000000004, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004047619047619047, "calib/std_conf": 0.059326339112334044, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3031.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 392.92578125, "completions/mean_terminated_length": 394.4666748046875, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.08106666666666666, "grad_norm": 0.12247002124786377, "learning_rate": 3.444444444444445e-06, "loss": 0.0492, "num_tokens": 15707337.0, "reward": 1.151153326034546, "reward_std": 0.28736162185668945, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.47180116176605225, "rewards/format_reward_step": 0.97265625, "step": 76 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.2460159034287663e-06, "aux_brier/mean_group_std": 0.06692432972577068, "aux_brier/mean_r": 0.9499065733130977, "aux_brier/n_active_tok": 157.75, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 39.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5181086519114688, "calib/avg_num_step_conf": 4.94140625, "calib/ece": 0.5237795275590551, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007273641851106641, "calib/mean_conf": 0.04236220472440945, "calib/mu_c": 0.03915492957746479, "calib/mu_w": 0.04642857142857143, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003543307086614173, "calib/std_conf": 0.06613731471936848, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2165.0, "completions/max_terminated_length": 2165.0, "completions/mean_length": 365.48828125, "completions/mean_terminated_length": 366.9216003417969, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.08213333333333334, "grad_norm": 0.3201024830341339, "learning_rate": 3.416666666666667e-06, "loss": 0.0312, "num_tokens": 15905566.0, "reward": 1.1665558815002441, "reward_std": 0.24066457152366638, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.4709107279777527, "rewards/format_reward_step": 0.98828125, "step": 77 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.7008393490974827e-06, "aux_brier/mean_group_std": 0.053520777021158816, "aux_brier/mean_r": 0.9644958211967355, "aux_brier/n_active_tok": 153.75, "aux_brier/n_groups": 8.3125, "aux_brier/n_step_records": 38.4375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5628333756667514, "calib/avg_num_step_conf": 4.84375, "calib/ece": 0.46417928286852594, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.009735394970789957, "calib/mean_conf": 0.04912749003984065, "calib/mu_c": 0.05393700787401576, "calib/mu_w": 0.0442016129032258, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.0036653386454183266, "calib/std_conf": 0.08170469419233298, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1496.0, "completions/max_terminated_length": 1496.0, "completions/mean_length": 419.484375, "completions/mean_terminated_length": 421.1294250488281, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.0832, "grad_norm": 0.4412800669670105, "learning_rate": 3.3888888888888893e-06, "loss": 0.0037, "num_tokens": 16120978.0, "reward": 1.1127533912658691, "reward_std": 0.2662516236305237, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5213262438774109, "rewards/format_reward_step": 0.97265625, "step": 78 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3937092612770208e-06, "aux_brier/mean_group_std": 0.055926159610741925, "aux_brier/mean_r": 0.9609619051659082, "aux_brier/n_active_tok": 165.375, "aux_brier/n_groups": 9.875, "aux_brier/n_step_records": 41.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6318265624006612, "calib/avg_num_step_conf": 5.203125, "calib/ece": 0.5318897637795276, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008951618030389713, "calib/mean_conf": 0.05228346456692914, "calib/mu_c": 0.05605442176870747, "calib/mu_w": 0.04710280373831776, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0027165354330708656, "calib/std_conf": 0.06688442230483788, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2014.0, "completions/max_terminated_length": 2014.0, "completions/mean_length": 415.703125, "completions/mean_terminated_length": 417.3333435058594, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.08426666666666667, "grad_norm": 0.056676361709833145, "learning_rate": 3.3611111111111117e-06, "loss": -0.0035, "num_tokens": 16333774.0, "reward": 1.1864250898361206, "reward_std": 0.2254134863615036, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.47226327657699585, "rewards/format_reward_step": 0.98828125, "step": 79 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.430265701458545e-07, "aux_brier/mean_group_std": 0.07378595864884269, "aux_brier/mean_r": 0.9537021174604706, "aux_brier/n_active_tok": 181.25, "aux_brier/n_groups": 9.84375, "aux_brier/n_step_records": 45.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.49804031354983214, "calib/avg_num_step_conf": 5.6875, "calib/ece": 0.49974117647058824, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.001939529675251954, "calib/mean_conf": 0.055945098039215685, "calib/mu_c": 0.055078014184397166, "calib/mu_w": 0.05701754385964912, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.001372549019607843, "calib/std_conf": 0.04814207858903416, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1642.0, "completions/max_terminated_length": 1642.0, "completions/mean_length": 380.67578125, "completions/mean_terminated_length": 380.67578125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.08533333333333333, "grad_norm": 0.011375880800187588, "learning_rate": 3.3333333333333333e-06, "loss": 0.0042, "num_tokens": 16533387.0, "reward": 1.1681087017059326, "reward_std": 0.25508496165275574, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.4927472174167633, "rewards/format_reward_step": 0.98828125, "step": 80 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.5845477869566054e-07, "aux_brier/mean_group_std": 0.0507530265053984, "aux_brier/mean_r": 0.9653950254716033, "aux_brier/n_active_tok": 163.875, "aux_brier/n_groups": 9.21875, "aux_brier/n_step_records": 40.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5447194719471947, "calib/avg_num_step_conf": 5.16796875, "calib/ece": 0.5508764940239044, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": 0.004193399339933991, "calib/mean_conf": 0.0551792828685259, "calib/mu_c": 0.056866666666666676, "calib/mu_w": 0.052673267326732685, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.004223107569721116, "calib/std_conf": 0.08545791658503876, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2582.0, "completions/max_terminated_length": 2582.0, "completions/mean_length": 429.3828125, "completions/mean_terminated_length": 434.4743347167969, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.0864, "grad_norm": 0.017039675265550613, "learning_rate": 3.3055555555555558e-06, "loss": 0.0058, "num_tokens": 16749557.0, "reward": 1.1777634620666504, "reward_std": 0.2547895014286041, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.437616765499115, "rewards/format_reward_step": 0.96484375, "step": 81 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.5483395826308985e-06, "aux_brier/mean_group_std": 0.05619580028388027, "aux_brier/mean_r": 0.9667790617048112, "aux_brier/n_active_tok": 170.375, "aux_brier/n_groups": 9.96875, "aux_brier/n_step_records": 42.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5851655629139073, "calib/avg_num_step_conf": 5.36328125, "calib/ece": 0.5572509960159363, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009368874172185432, "calib/mean_conf": 0.04593625498007969, "calib/mu_c": 0.04966887417218543, "calib/mu_w": 0.040299999999999996, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0007968127490039841, "calib/std_conf": 0.03421603385006815, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2673.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 414.01953125, "completions/mean_terminated_length": 415.6431579589844, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.08746666666666666, "grad_norm": 0.019228482618927956, "learning_rate": 3.277777777777778e-06, "loss": 0.0495, "num_tokens": 16961098.0, "reward": 1.1896255016326904, "reward_std": 0.19961920380592346, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.44600194692611694, "rewards/format_reward_step": 0.9765625, "step": 82 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.9809612028808274e-07, "aux_brier/mean_group_std": 0.06320812036477821, "aux_brier/mean_r": 0.947728329427175, "aux_brier/n_active_tok": 181.375, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 45.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4861111111111111, "calib/avg_num_step_conf": 5.76171875, "calib/ece": 0.45103174603174606, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.010317460317460315, "calib/mean_conf": 0.06174603174603174, "calib/mu_c": 0.05658730158730158, "calib/mu_w": 0.0669047619047619, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006388888888888888, "calib/std_conf": 0.05439536869115432, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2195.0, "completions/max_terminated_length": 2195.0, "completions/mean_length": 485.3984375, "completions/mean_terminated_length": 487.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 150.0, "epoch": 0.08853333333333334, "grad_norm": 0.02043166384100914, "learning_rate": 3.2500000000000002e-06, "loss": -0.0036, "num_tokens": 17192624.0, "reward": 1.113842248916626, "reward_std": 0.22586926817893982, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5334937572479248, "rewards/format_reward_step": 0.9765625, "step": 83 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.0873535569166926e-06, "aux_brier/mean_group_std": 0.0742799624119722, "aux_brier/mean_r": 0.9464002895295682, "aux_brier/n_active_tok": 181.75, "aux_brier/n_groups": 10.78125, "aux_brier/n_step_records": 45.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5815245478036176, "calib/avg_num_step_conf": 5.703125, "calib/ece": 0.4635863453815261, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0036800387596899256, "calib/mean_conf": 0.06171485943775101, "calib/mu_c": 0.06348837209302327, "calib/mu_w": 0.059808333333333345, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.003614457831325301, "calib/std_conf": 0.05428530739043686, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2362.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 427.44921875, "completions/mean_terminated_length": 430.8149719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 109.0, "epoch": 0.0896, "grad_norm": 0.07000966370105743, "learning_rate": 3.2222222222222227e-06, "loss": 0.0348, "num_tokens": 17407971.0, "reward": 1.1227518320083618, "reward_std": 0.2660667896270752, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.5222572088241577, "rewards/format_reward_step": 0.96875, "step": 84 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.4014386558231e-07, "aux_brier/mean_group_std": 0.06883538968056355, "aux_brier/mean_r": 0.9534327318669129, "aux_brier/n_active_tok": 191.375, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 47.84375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.655657611881962, "calib/avg_num_step_conf": 6.04296875, "calib/ece": 0.41879838709677425, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01674275291511953, "calib/mean_conf": 0.06507258064516128, "calib/mu_c": 0.07378151260504201, "calib/mu_w": 0.05703875968992248, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0020161290322580645, "calib/std_conf": 0.04755297103894344, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3044.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 495.390625, "completions/mean_terminated_length": 499.2913513183594, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.09066666666666667, "grad_norm": 0.18353381752967834, "learning_rate": 3.1944444444444443e-06, "loss": 0.0524, "num_tokens": 17642615.0, "reward": 1.0919911861419678, "reward_std": 0.2321237176656723, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.5632776021957397, "rewards/format_reward_step": 0.96484375, "step": 85 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.099286303624103e-06, "aux_brier/mean_group_std": 0.06707426515548004, "aux_brier/mean_r": 0.9573595262489308, "aux_brier/n_active_tok": 191.375, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 47.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5389953523906539, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.38638888888888884, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.004916661361176536, "calib/mean_conf": 0.08011904761904762, "calib/mu_c": 0.07740707964601772, "calib/mu_w": 0.08232374100719425, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009047619047619047, "calib/std_conf": 0.08259324765271753, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3054.0, "completions/max_terminated_length": 3054.0, "completions/mean_length": 485.828125, "completions/mean_terminated_length": 485.828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.09173333333333333, "grad_norm": 0.03711724281311035, "learning_rate": 3.1666666666666667e-06, "loss": 0.0345, "num_tokens": 17872499.0, "reward": 1.0802927017211914, "reward_std": 0.25492456555366516, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.5946087837219238, "rewards/format_reward_step": 0.98046875, "step": 86 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0451743066158814e-06, "aux_brier/mean_group_std": 0.07929236365768332, "aux_brier/mean_r": 0.9472709314494246, "aux_brier/n_active_tok": 165.75, "aux_brier/n_groups": 9.28125, "aux_brier/n_step_records": 41.4375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.42985277301315034, "calib/avg_num_step_conf": 5.42578125, "calib/ece": 0.5742874493927125, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01776672384219556, "calib/mean_conf": 0.08174493927125508, "calib/mu_c": 0.07541509433962264, "calib/mu_w": 0.0931818181818182, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006153846153846154, "calib/std_conf": 0.06612798690816678, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 442.55859375, "completions/mean_terminated_length": 446.0433044433594, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.0928, "grad_norm": 0.04962961748242378, "learning_rate": 3.138888888888889e-06, "loss": 0.0671, "num_tokens": 18091290.0, "reward": 1.2120829820632935, "reward_std": 0.31140634417533875, "rewards/accuracy_reward_step": 0.625, "rewards/final_brier_reward_step": 0.4264569580554962, "rewards/format_reward_step": 0.9609375, "step": 87 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0509063224084514e-06, "aux_brier/mean_group_std": 0.06235820368722907, "aux_brier/mean_r": 0.9602920220944907, "aux_brier/n_active_tok": 186.0, "aux_brier/n_groups": 9.8125, "aux_brier/n_step_records": 46.5, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6195488721804512, "calib/avg_num_step_conf": 5.91015625, "calib/ece": 0.4569737903225806, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01963409610983982, "calib/mean_conf": 0.07931653225806454, "calib/mu_c": 0.08842105263157896, "calib/mu_w": 0.06878695652173913, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.04824017542538108, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 492.8515625, "completions/mean_terminated_length": 498.6956787109375, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.09386666666666667, "grad_norm": 0.02846703678369522, "learning_rate": 3.1111111111111116e-06, "loss": 0.0405, "num_tokens": 18327308.0, "reward": 1.1370923519134521, "reward_std": 0.26496851444244385, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5327448844909668, "rewards/format_reward_step": 0.96875, "step": 88 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.189935513629564e-08, "aux_brier/mean_group_std": 0.07248998753124442, "aux_brier/mean_r": 0.9536642206232372, "aux_brier/n_active_tok": 193.875, "aux_brier/n_groups": 11.9375, "aux_brier/n_step_records": 48.46875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5540099268547545, "calib/avg_num_step_conf": 6.11328125, "calib/ece": 0.3885556451612903, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005280094043887171, "calib/mean_conf": 0.08813790322580646, "calib/mu_c": 0.09094827586206898, "calib/mu_w": 0.08566818181818181, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004475806451612903, "calib/std_conf": 0.06480840633040909, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2568.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 518.26953125, "completions/mean_terminated_length": 520.302001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 98.0, "epoch": 0.09493333333333333, "grad_norm": 0.06254668533802032, "learning_rate": 3.0833333333333336e-06, "loss": 0.1058, "num_tokens": 18568873.0, "reward": 1.0756702423095703, "reward_std": 0.25718456506729126, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.5761181116104126, "rewards/format_reward_step": 0.95703125, "step": 89 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1078112610585222e-07, "aux_brier/mean_group_std": 0.08830140773048345, "aux_brier/mean_r": 0.9366528511074824, "aux_brier/n_active_tok": 200.875, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 50.21875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5940578428896186, "calib/avg_num_step_conf": 6.3203125, "calib/ece": 0.48576470588235304, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.014897069967163432, "calib/mean_conf": 0.09462745098039217, "calib/mu_c": 0.10087837837837837, "calib/mu_w": 0.08598130841121494, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.0498680457962984, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1480.0, "completions/max_terminated_length": 1480.0, "completions/mean_length": 458.7109375, "completions/mean_terminated_length": 460.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.096, "grad_norm": 0.010644305497407913, "learning_rate": 3.055555555555556e-06, "loss": 0.0114, "num_tokens": 18789623.0, "reward": 1.2040843963623047, "reward_std": 0.23600390553474426, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5194628834724426, "rewards/format_reward_step": 0.9921875, "step": 90 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.391664971796331e-07, "aux_brier/mean_group_std": 0.044456516205821715, "aux_brier/mean_r": 0.9723564386852445, "aux_brier/n_active_tok": 191.625, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 47.90625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5052134245682632, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.5175590551181102, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.007134571521668312, "calib/mean_conf": 0.10039370078740158, "calib/mu_c": 0.09761290322580644, "calib/mu_w": 0.10474747474747476, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.003858267716535433, "calib/std_conf": 0.07329050786423223, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2952.0, "completions/max_terminated_length": 2952.0, "completions/mean_length": 488.28125, "completions/mean_terminated_length": 488.28125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.09706666666666666, "grad_norm": 0.03312379866838455, "learning_rate": 3.0277777777777776e-06, "loss": 0.0216, "num_tokens": 19022335.0, "reward": 1.221062421798706, "reward_std": 0.21122238039970398, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.4858124852180481, "rewards/format_reward_step": 0.98828125, "step": 91 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.314703541818403e-08, "aux_brier/mean_group_std": 0.03511055353663211, "aux_brier/mean_r": 0.9748766047694974, "aux_brier/n_active_tok": 193.875, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 48.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5558313434711575, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.46970588235294114, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.004519605378911676, "calib/mean_conf": 0.11029411764705883, "calib/mu_c": 0.11222602739726029, "calib/mu_w": 0.10770642201834861, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.003725490196078431, "calib/std_conf": 0.07992650892002617, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1367.0, "completions/max_terminated_length": 1367.0, "completions/mean_length": 434.4296875, "completions/mean_terminated_length": 436.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.09813333333333334, "grad_norm": 0.016238447278738022, "learning_rate": 3e-06, "loss": 0.0156, "num_tokens": 19240269.0, "reward": 1.1943002939224243, "reward_std": 0.22463177144527435, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.5272014737129211, "rewards/format_reward_step": 0.984375, "step": 92 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.560714980174964e-07, "aux_brier/mean_group_std": 0.0628678582057803, "aux_brier/mean_r": 0.9638688028292322, "aux_brier/n_active_tok": 221.5, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 55.375, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.46636302294197024, "calib/avg_num_step_conf": 7.08203125, "calib/ece": 0.4272868852459016, "calib/final_conf_rate": 0.953125, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.01464858299595144, "calib/mean_conf": 0.11509016393442623, "calib/mu_c": 0.10824615384615384, "calib/mu_w": 0.12289473684210528, "calib/nonempty_final_conf_rate": 0.953125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.004795081967213115, "calib/std_conf": 0.07211676598433057, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2892.0, "completions/max_terminated_length": 2892.0, "completions/mean_length": 541.20703125, "completions/mean_terminated_length": 545.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.0992, "grad_norm": 0.017522595822811127, "learning_rate": 2.9722222222222225e-06, "loss": 0.0622, "num_tokens": 19484594.0, "reward": 1.1170116662979126, "reward_std": 0.2905193567276001, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5305466055870056, "rewards/format_reward_step": 0.9453125, "step": 93 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.603671444671555e-07, "aux_brier/mean_group_std": 0.06170459423537104, "aux_brier/mean_r": 0.9574224276448977, "aux_brier/n_active_tok": 192.625, "aux_brier/n_groups": 10.6875, "aux_brier/n_step_records": 48.15625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6089935064935066, "calib/avg_num_step_conf": 6.2109375, "calib/ece": 0.4455599999999999, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.020103896103896082, "calib/mean_conf": 0.11444000000000001, "calib/mu_c": 0.12328571428571428, "calib/mu_w": 0.1031818181818182, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.05982546614945846, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2384.0, "completions/max_terminated_length": 2384.0, "completions/mean_length": 460.0390625, "completions/mean_terminated_length": 463.6614074707031, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.10026666666666667, "grad_norm": 0.049883171916007996, "learning_rate": 2.944444444444445e-06, "loss": -0.0079, "num_tokens": 19711044.0, "reward": 1.169290542602539, "reward_std": 0.2328319549560547, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5443500280380249, "rewards/format_reward_step": 0.97265625, "step": 94 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.062849758204365e-08, "aux_brier/mean_group_std": 0.08511857969070105, "aux_brier/mean_r": 0.941223628726528, "aux_brier/n_active_tok": 218.625, "aux_brier/n_groups": 11.625, "aux_brier/n_step_records": 54.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5710749646393212, "calib/avg_num_step_conf": 6.859375, "calib/ece": 0.4831568627450981, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.004575028931464581, "calib/mean_conf": 0.12994117647058825, "calib/mu_c": 0.13175324675324676, "calib/mu_w": 0.12717821782178218, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.004588235294117648, "calib/std_conf": 0.08687661111829946, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2789.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 491.24609375, "completions/mean_terminated_length": 491.24609375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.10133333333333333, "grad_norm": 0.007514007855206728, "learning_rate": 2.916666666666667e-06, "loss": 0.0427, "num_tokens": 19942931.0, "reward": 1.2268400192260742, "reward_std": 0.25178098678588867, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.5245475769042969, "rewards/format_reward_step": 0.98828125, "step": 95 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.471258988030357e-07, "aux_brier/mean_group_std": 0.07623959324164836, "aux_brier/mean_r": 0.9404822735834342, "aux_brier/n_active_tok": 193.25, "aux_brier/n_groups": 10.21875, "aux_brier/n_step_records": 48.3125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5431751976695797, "calib/avg_num_step_conf": 6.0859375, "calib/ece": 0.5144581673306773, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0029205160216396187, "calib/mean_conf": 0.13470517928286854, "calib/mu_c": 0.13574074074074075, "calib/mu_w": 0.13282022471910113, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0018725099601593625, "calib/std_conf": 0.05681746067642289, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 446.1953125, "completions/mean_terminated_length": 449.7086486816406, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1024, "grad_norm": 0.04604311287403107, "learning_rate": 2.888888888888889e-06, "loss": -0.0022, "num_tokens": 20162973.0, "reward": 1.2425113916397095, "reward_std": 0.2629367709159851, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.49348321557044983, "rewards/format_reward_step": 0.97265625, "step": 96 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.3304468677330945e-07, "aux_brier/mean_group_std": 0.055901029589494125, "aux_brier/mean_r": 0.9592811271432996, "aux_brier/n_active_tok": 203.125, "aux_brier/n_groups": 12.25, "aux_brier/n_step_records": 50.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4733009708737865, "calib/avg_num_step_conf": 6.3515625, "calib/ece": 0.4533665338645418, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0031376279191813183, "calib/mean_conf": 0.13675298804780878, "calib/mu_c": 0.13804054054054055, "calib/mu_w": 0.13490291262135923, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00023904382470119542, "calib/std_conf": 0.0603798957748416, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 465.5859375, "completions/mean_terminated_length": 465.5859375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.10346666666666667, "grad_norm": 0.0215875543653965, "learning_rate": 2.861111111111111e-06, "loss": 0.067, "num_tokens": 20387235.0, "reward": 1.198427438735962, "reward_std": 0.2698313295841217, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5358976125717163, "rewards/format_reward_step": 0.97265625, "step": 97 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.648226779481069e-07, "aux_brier/mean_group_std": 0.08896616020067474, "aux_brier/mean_r": 0.927187821130504, "aux_brier/n_active_tok": 195.25, "aux_brier/n_groups": 11.53125, "aux_brier/n_step_records": 48.8125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5733445945945945, "calib/avg_num_step_conf": 6.1171875, "calib/ece": 0.44758064516129037, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013816216216216204, "calib/mean_conf": 0.15564516129032258, "calib/mu_c": 0.1612162162162162, "calib/mu_w": 0.1474, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0032258064516129032, "calib/std_conf": 0.07882444699665722, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2380.0, "completions/max_terminated_length": 2380.0, "completions/mean_length": 488.26953125, "completions/mean_terminated_length": 494.0592956542969, "completions/min_length": 0.0, "completions/min_terminated_length": 54.0, "epoch": 0.10453333333333334, "grad_norm": 0.03524274006485939, "learning_rate": 2.8333333333333335e-06, "loss": 0.0089, "num_tokens": 20618416.0, "reward": 1.1964812278747559, "reward_std": 0.3142014145851135, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.543737530708313, "rewards/format_reward_step": 0.96484375, "step": 98 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.33231927501987e-07, "aux_brier/mean_group_std": 0.044237613843478026, "aux_brier/mean_r": 0.9645182520954001, "aux_brier/n_active_tok": 204.875, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 51.21875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5962581722075393, "calib/avg_num_step_conf": 6.4140625, "calib/ece": 0.21859437751004018, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004016064257028112, "calib/gap": 0.02309152872444009, "calib/mean_conf": 0.15457831325301202, "calib/mu_c": 0.16923076923076924, "calib/mu_w": 0.14613924050632915, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0038554216867469873, "calib/std_conf": 0.08163934604623983, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3017.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 565.86328125, "completions/mean_terminated_length": 568.0823974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1056, "grad_norm": 0.021649803966283798, "learning_rate": 2.805555555555556e-06, "loss": 0.0905, "num_tokens": 20869077.0, "reward": 1.0164519548416138, "reward_std": 0.26768240332603455, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7064328193664551, "rewards/format_reward_step": 0.96875, "step": 99 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.9755153549821935e-07, "aux_brier/mean_group_std": 0.070615693003584, "aux_brier/mean_r": 0.9427640854618798, "aux_brier/n_active_tok": 192.125, "aux_brier/n_groups": 10.21875, "aux_brier/n_step_records": 48.03125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6237351378699721, "calib/avg_num_step_conf": 6.00390625, "calib/ece": 0.32043650793650796, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.025935997976220565, "calib/mean_conf": 0.15654761904761905, "calib/mu_c": 0.17033898305084744, "calib/mu_w": 0.14440298507462687, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004365079365079365, "calib/std_conf": 0.08157906936109978, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2387.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 482.6171875, "completions/mean_terminated_length": 484.50982666015625, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.10666666666666667, "grad_norm": 0.02115054987370968, "learning_rate": 2.7777777777777783e-06, "loss": 0.0617, "num_tokens": 21100035.0, "reward": 1.1155734062194824, "reward_std": 0.22176724672317505, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6497933864593506, "rewards/format_reward_step": 0.984375, "step": 100 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.6289987359030533e-07, "aux_brier/mean_group_std": 0.05416497296840116, "aux_brier/mean_r": 0.9574554088139146, "aux_brier/n_active_tok": 212.375, "aux_brier/n_groups": 12.625, "aux_brier/n_step_records": 53.09375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5762396561678107, "calib/avg_num_step_conf": 6.640625, "calib/ece": 0.32036, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.006435948425171617, "calib/mean_conf": 0.16772, "calib/mu_c": 0.17109243697478993, "calib/mu_w": 0.1646564885496183, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.006040000000000001, "calib/std_conf": 0.09132798913805121, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2550.0, "completions/max_terminated_length": 2550.0, "completions/mean_length": 549.37890625, "completions/mean_terminated_length": 551.5333862304688, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.10773333333333333, "grad_norm": 0.013132737018167973, "learning_rate": 2.7500000000000004e-06, "loss": 0.0397, "num_tokens": 21347668.0, "reward": 1.1007204055786133, "reward_std": 0.2732691764831543, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6216316223144531, "rewards/format_reward_step": 0.9609375, "step": 101 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1049290096765141e-07, "aux_brier/mean_group_std": 0.07210684931674305, "aux_brier/mean_r": 0.9431077216718703, "aux_brier/n_active_tok": 187.375, "aux_brier/n_groups": 10.46875, "aux_brier/n_step_records": 46.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5621585609593605, "calib/avg_num_step_conf": 5.85546875, "calib/ece": 0.4517391304347827, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.024656895403064594, "calib/mean_conf": 0.1727667984189723, "calib/mu_c": 0.1820253164556962, "calib/mu_w": 0.1573684210526316, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.09603736182358646, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3004.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 435.859375, "completions/mean_terminated_length": 435.859375, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.1088, "grad_norm": 0.0939481109380722, "learning_rate": 2.7222222222222224e-06, "loss": 0.0405, "num_tokens": 21565944.0, "reward": 1.2448434829711914, "reward_std": 0.2115369737148285, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.5496863126754761, "rewards/format_reward_step": 0.98046875, "step": 102 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.943412219657553e-07, "aux_brier/mean_group_std": 0.05980401389252265, "aux_brier/mean_r": 0.9504609641694534, "aux_brier/n_active_tok": 186.0, "aux_brier/n_groups": 11.5, "aux_brier/n_step_records": 46.5, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4941273048749684, "calib/avg_num_step_conf": 5.8359375, "calib/ece": 0.41670588235294126, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.013238822935084577, "calib/mean_conf": 0.18156862745098037, "calib/mu_c": 0.17601351351351355, "calib/mu_w": 0.18925233644859812, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008941176470588235, "calib/std_conf": 0.09257900810255727, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 523.44140625, "completions/mean_terminated_length": 523.44140625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.10986666666666667, "grad_norm": 0.029213661327958107, "learning_rate": 2.6944444444444444e-06, "loss": 0.0579, "num_tokens": 21804497.0, "reward": 1.2157342433929443, "reward_std": 0.18733951449394226, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.5738745927810669, "rewards/format_reward_step": 0.98828125, "step": 103 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1070041030514233e-06, "aux_brier/mean_group_std": 0.0525261505534932, "aux_brier/mean_r": 0.9526624960350326, "aux_brier/n_active_tok": 187.0, "aux_brier/n_groups": 10.40625, "aux_brier/n_step_records": 46.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5373905373905374, "calib/avg_num_step_conf": 5.84765625, "calib/ece": 0.251751968503937, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.008606753606753587, "calib/mean_conf": 0.19533464566929135, "calib/mu_c": 0.2001801801801802, "calib/mu_w": 0.1915734265734266, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005039370078740158, "calib/std_conf": 0.10022409055330288, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2127.0, "completions/max_terminated_length": 2127.0, "completions/mean_length": 462.359375, "completions/mean_terminated_length": 462.359375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.11093333333333333, "grad_norm": 0.01652079075574875, "learning_rate": 2.666666666666667e-06, "loss": 0.0345, "num_tokens": 22029541.0, "reward": 1.10176682472229, "reward_std": 0.20520168542861938, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.680504560470581, "rewards/format_reward_step": 0.98828125, "step": 104 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.19404509946186e-07, "aux_brier/mean_group_std": 0.05946307159458818, "aux_brier/mean_r": 0.9456807016864452, "aux_brier/n_active_tok": 201.875, "aux_brier/n_groups": 11.84375, "aux_brier/n_step_records": 50.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.42186904005085823, "calib/avg_num_step_conf": 6.30859375, "calib/ece": 0.35533864541832677, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.04039478703115068, "calib/mean_conf": 0.21478087649402391, "calib/mu_c": 0.19530769230769232, "calib/mu_w": 0.235702479338843, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.026095617529880474, "calib/std_conf": 0.13522097818409848, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2863.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 519.875, "completions/mean_terminated_length": 521.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.112, "grad_norm": 0.0219018142670393, "learning_rate": 2.6388888888888893e-06, "loss": 0.0899, "num_tokens": 22268389.0, "reward": 1.1437761783599854, "reward_std": 0.31400659680366516, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.5985422134399414, "rewards/format_reward_step": 0.96484375, "step": 105 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.319968316459423e-07, "aux_brier/mean_group_std": 0.05996420179630981, "aux_brier/mean_r": 0.9467734982637507, "aux_brier/n_active_tok": 183.375, "aux_brier/n_groups": 9.6875, "aux_brier/n_step_records": 45.84375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5989866534849234, "calib/avg_num_step_conf": 5.73046875, "calib/ece": 0.33345098039215687, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.030189075630252166, "calib/mean_conf": 0.1998823529411765, "calib/mu_c": 0.21397058823529413, "calib/mu_w": 0.18378151260504197, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.090256637811152, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2980.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 444.140625, "completions/mean_terminated_length": 444.140625, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.11306666666666666, "grad_norm": 0.041677430272102356, "learning_rate": 2.6111111111111113e-06, "loss": 0.0461, "num_tokens": 22486673.0, "reward": 1.1903657913208008, "reward_std": 0.2239205539226532, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6442761421203613, "rewards/format_reward_step": 0.99609375, "step": 106 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1472411331481247e-06, "aux_brier/mean_group_std": 0.06170651422332253, "aux_brier/mean_r": 0.9504726472874938, "aux_brier/n_active_tok": 210.0, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 52.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5769972451790634, "calib/avg_num_step_conf": 6.79296875, "calib/ece": 0.4678260869565216, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.013712121212121203, "calib/mean_conf": 0.19280632411067194, "calib/mu_c": 0.19757575757575757, "calib/mu_w": 0.18386363636363637, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0042292490118577076, "calib/std_conf": 0.09102637219132664, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1121.0, "completions/max_terminated_length": 1121.0, "completions/mean_length": 424.3984375, "completions/mean_terminated_length": 429.43084716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.11413333333333334, "grad_norm": 0.027883950620889664, "learning_rate": 2.5833333333333337e-06, "loss": -0.0391, "num_tokens": 22699935.0, "reward": 1.2770494222640991, "reward_std": 0.21880196034908295, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.5535101890563965, "rewards/format_reward_step": 0.98828125, "step": 107 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.285217771795359e-07, "aux_brier/mean_group_std": 0.07730223343893546, "aux_brier/mean_r": 0.9229285104785447, "aux_brier/n_active_tok": 200.0, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 50.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6192843691148776, "calib/avg_num_step_conf": 6.25390625, "calib/ece": 0.47761904761904755, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.029066666666666685, "calib/mean_conf": 0.23801587301587304, "calib/mu_c": 0.2466666666666667, "calib/mu_w": 0.21760000000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.006626984126984126, "calib/std_conf": 0.1354329907597946, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2517.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 486.671875, "completions/mean_terminated_length": 488.5804138183594, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.1152, "grad_norm": 0.009353240951895714, "learning_rate": 2.5555555555555557e-06, "loss": 0.0334, "num_tokens": 22927755.0, "reward": 1.3246445655822754, "reward_std": 0.25123316049575806, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.5563905835151672, "rewards/format_reward_step": 0.98046875, "step": 108 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.0594688352491026e-07, "aux_brier/mean_group_std": 0.052904228959484, "aux_brier/mean_r": 0.9500203047601241, "aux_brier/n_active_tok": 221.375, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 55.34375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5782130056323604, "calib/avg_num_step_conf": 6.98046875, "calib/ece": 0.28539999999999993, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.004, "calib/gap": 0.027186379928315463, "calib/mean_conf": 0.22596, "calib/mu_c": 0.23944444444444446, "calib/mu_w": 0.212258064516129, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00368, "calib/std_conf": 0.1264123348411855, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2890.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 533.0078125, "completions/mean_terminated_length": 533.0078125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.11626666666666667, "grad_norm": 0.02860928140580654, "learning_rate": 2.5277777777777778e-06, "loss": 0.0865, "num_tokens": 23168805.0, "reward": 1.1391407251358032, "reward_std": 0.20494352281093597, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6503129005432129, "rewards/format_reward_step": 0.96875, "step": 109 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.665521534721108e-08, "aux_brier/mean_group_std": 0.06194897401406562, "aux_brier/mean_r": 0.9388521722173031, "aux_brier/n_active_tok": 170.625, "aux_brier/n_groups": 9.65625, "aux_brier/n_step_records": 42.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.550551948051948, "calib/avg_num_step_conf": 5.5078125, "calib/ece": 0.33968, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003688311688311685, "calib/mean_conf": 0.24552000000000004, "calib/mu_c": 0.2471428571428571, "calib/mu_w": 0.24345454545454542, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.0126, "calib/std_conf": 0.1331943302096602, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2438.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 452.08984375, "completions/mean_terminated_length": 455.64959716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 106.0, "epoch": 0.11733333333333333, "grad_norm": 0.02334379218518734, "learning_rate": 2.5e-06, "loss": 0.0028, "num_tokens": 23389460.0, "reward": 1.1788196563720703, "reward_std": 0.3163892328739166, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6137160062789917, "rewards/format_reward_step": 0.95703125, "step": 110 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.735419982995534e-07, "aux_brier/mean_group_std": 0.09109005346124643, "aux_brier/mean_r": 0.9050274539363512, "aux_brier/n_active_tok": 204.5, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 51.125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5725766103814884, "calib/avg_num_step_conf": 6.54296875, "calib/ece": 0.252800790513834, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.03333909943714819, "calib/mean_conf": 0.30103320158102764, "calib/mu_c": 0.31724153846153846, "calib/mu_w": 0.2839024390243903, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.020000000000000007, "calib/std_conf": 0.1665680062792277, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2684.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 488.953125, "completions/mean_terminated_length": 492.80316162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.1184, "grad_norm": 0.013404710218310356, "learning_rate": 2.4722222222222226e-06, "loss": 0.0219, "num_tokens": 23622040.0, "reward": 1.1733752489089966, "reward_std": 0.22607816755771637, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6856883764266968, "rewards/format_reward_step": 0.98828125, "step": 111 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.460265378205321e-08, "aux_brier/mean_group_std": 0.10395217381270051, "aux_brier/mean_r": 0.8996797146078752, "aux_brier/n_active_tok": 182.625, "aux_brier/n_groups": 11.21875, "aux_brier/n_step_records": 45.65625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.621199619168518, "calib/avg_num_step_conf": 5.7265625, "calib/ece": 0.2562460317460318, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.06215055537924474, "calib/mean_conf": 0.3052142857142857, "calib/mu_c": 0.33357664233576645, "calib/mu_w": 0.2714260869565217, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.008904761904761905, "calib/std_conf": 0.16373401325780074, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2516.0, "completions/max_terminated_length": 2516.0, "completions/mean_length": 501.3046875, "completions/mean_terminated_length": 507.2490234375, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.11946666666666667, "grad_norm": 0.041241325438022614, "learning_rate": 2.4444444444444447e-06, "loss": -0.0257, "num_tokens": 23858294.0, "reward": 1.1883974075317383, "reward_std": 0.25365743041038513, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6754645705223083, "rewards/format_reward_step": 0.96875, "step": 112 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2923106848528043e-08, "aux_brier/mean_group_std": 0.07309923488199442, "aux_brier/mean_r": 0.9074925321416778, "aux_brier/n_active_tok": 186.125, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 46.53125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5887941919191919, "calib/avg_num_step_conf": 5.9140625, "calib/ece": 0.25413385826771656, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.04289393939393937, "calib/mean_conf": 0.3405905511811024, "calib/mu_c": 0.35916666666666663, "calib/mu_w": 0.31627272727272726, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.013897637795275582, "calib/std_conf": 0.16184725249155846, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1360.0, "completions/max_terminated_length": 1360.0, "completions/mean_length": 413.90625, "completions/mean_terminated_length": 415.5294494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 142.0, "epoch": 0.12053333333333334, "grad_norm": 0.01859516091644764, "learning_rate": 2.4166666666666667e-06, "loss": -0.0, "num_tokens": 24069454.0, "reward": 1.2298774719238281, "reward_std": 0.24649205803871155, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6851351261138916, "rewards/format_reward_step": 0.984375, "step": 113 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.21536945852219e-08, "aux_brier/mean_group_std": 0.08133118644096639, "aux_brier/mean_r": 0.9042530015510433, "aux_brier/n_active_tok": 189.625, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 47.40625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5784134712923515, "calib/avg_num_step_conf": 5.984375, "calib/ece": 0.3130952380952381, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.003968253968253968, "calib/gap": 0.03976871576384672, "calib/mean_conf": 0.3459523809523809, "calib/mu_c": 0.3606289308176101, "calib/mu_w": 0.3208602150537634, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.014047619047619047, "calib/std_conf": 0.18530189255164115, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 449.04296875, "completions/mean_terminated_length": 450.803955078125, "completions/min_length": 0.0, "completions/min_terminated_length": 135.0, "epoch": 0.1216, "grad_norm": 0.0324382446706295, "learning_rate": 2.388888888888889e-06, "loss": 0.0209, "num_tokens": 24289433.0, "reward": 1.2734736204147339, "reward_std": 0.26929062604904175, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6563944816589355, "rewards/format_reward_step": 0.9765625, "step": 114 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.4504427819916899e-07, "aux_brier/mean_group_std": 0.0900736247521059, "aux_brier/mean_r": 0.8803260767609933, "aux_brier/n_active_tok": 182.875, "aux_brier/n_groups": 10.3125, "aux_brier/n_step_records": 45.71875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5924377554812337, "calib/avg_num_step_conf": 5.71484375, "calib/ece": 0.17807843137254903, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.00392156862745098, "calib/gap": 0.05347082868822001, "calib/mean_conf": 0.40184313725490195, "calib/mu_c": 0.42637681159420293, "calib/mu_w": 0.3729059829059829, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.01937254901960784, "calib/std_conf": 0.18321993347212573, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1456.0, "completions/max_terminated_length": 1456.0, "completions/mean_length": 430.4453125, "completions/mean_terminated_length": 432.13336181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 101.0, "epoch": 0.12266666666666666, "grad_norm": 0.03782593458890915, "learning_rate": 2.361111111111111e-06, "loss": 0.0247, "num_tokens": 24504891.0, "reward": 1.2016677856445312, "reward_std": 0.31003236770629883, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7051090002059937, "rewards/format_reward_step": 0.97265625, "step": 115 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.035065170882035e-09, "aux_brier/mean_group_std": 0.08815182737458258, "aux_brier/mean_r": 0.8630262822311691, "aux_brier/n_active_tok": 192.0, "aux_brier/n_groups": 11.75, "aux_brier/n_step_records": 48.0, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5845454545454546, "calib/avg_num_step_conf": 6.03515625, "calib/ece": 0.2103921568627451, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.03137254901960784, "calib/gap": 0.06291536050156743, "calib/mean_conf": 0.39541176470588235, "calib/mu_c": 0.4225517241379311, "calib/mu_w": 0.35963636363636364, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.01858823529411766, "calib/std_conf": 0.19378512220261362, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1306.0, "completions/max_terminated_length": 1306.0, "completions/mean_length": 492.94140625, "completions/mean_terminated_length": 494.8745422363281, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.12373333333333333, "grad_norm": 0.010954463854432106, "learning_rate": 2.3333333333333336e-06, "loss": -0.0043, "num_tokens": 24735604.0, "reward": 1.2322262525558472, "reward_std": 0.2654731869697571, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.7023426294326782, "rewards/format_reward_step": 0.98046875, "step": 116 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.538974083371897e-09, "aux_brier/mean_group_std": 0.09602579113099918, "aux_brier/mean_r": 0.8550165856812552, "aux_brier/n_active_tok": 202.375, "aux_brier/n_groups": 12.46875, "aux_brier/n_step_records": 50.59375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5607889979338843, "calib/avg_num_step_conf": 6.35546875, "calib/ece": 0.15590361445783132, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.020080321285140562, "calib/gap": 0.03129713326446287, "calib/mean_conf": 0.44763052208835347, "calib/mu_c": 0.4637190082644629, "calib/mu_w": 0.432421875, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.984375, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.058795180722891555, "calib/std_conf": 0.19209583878168068, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1958.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 473.125, "completions/mean_terminated_length": 474.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.1248, "grad_norm": 0.03597588464617729, "learning_rate": 2.305555555555556e-06, "loss": 0.03, "num_tokens": 24963324.0, "reward": 1.1285216808319092, "reward_std": 0.29047146439552307, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.7015870809555054, "rewards/format_reward_step": 0.9609375, "step": 117 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0043079295279922e-08, "aux_brier/mean_group_std": 0.12438194827326207, "aux_brier/mean_r": 0.8426269790154287, "aux_brier/n_active_tok": 213.0, "aux_brier/n_groups": 13.375, "aux_brier/n_step_records": 53.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5115540717465304, "calib/avg_num_step_conf": 6.76953125, "calib/ece": 0.22776209677419354, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.04032258064516129, "calib/gap": -0.0007914375490967185, "calib/mean_conf": 0.425625, "calib/mu_c": 0.42526119402985074, "calib/mu_w": 0.42605263157894746, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.056532258064516124, "calib/std_conf": 0.22282260162370163, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2337.0, "completions/max_terminated_length": 2337.0, "completions/mean_length": 518.23046875, "completions/mean_terminated_length": 522.31103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.12586666666666665, "grad_norm": 0.010118731297552586, "learning_rate": 2.277777777777778e-06, "loss": -0.0166, "num_tokens": 25199999.0, "reward": 1.1688100099563599, "reward_std": 0.24001935124397278, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6596146821975708, "rewards/format_reward_step": 0.9609375, "step": 118 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 9.805239548077438e-08, "aux_brier/mean_group_std": 0.1099807086024779, "aux_brier/mean_r": 0.836042178190435, "aux_brier/n_active_tok": 203.125, "aux_brier/n_groups": 12.71875, "aux_brier/n_step_records": 50.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.606272577996716, "calib/avg_num_step_conf": 6.42578125, "calib/ece": 0.17004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.036, "calib/gap": 0.07514285714285712, "calib/mean_conf": 0.47044, "calib/mu_c": 0.502, "calib/mu_w": 0.4268571428571429, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.030240000000000003, "calib/std_conf": 0.20611115059598303, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2561.0, "completions/max_terminated_length": 2561.0, "completions/mean_length": 521.5, "completions/mean_terminated_length": 521.5, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.12693333333333334, "grad_norm": 0.015509380958974361, "learning_rate": 2.25e-06, "loss": -0.0036, "num_tokens": 25438567.0, "reward": 1.2388975620269775, "reward_std": 0.34047576785087585, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7212152481079102, "rewards/format_reward_step": 0.9765625, "step": 119 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.9051110350387646e-07, "aux_brier/mean_group_std": 0.09323607420919222, "aux_brier/mean_r": 0.849411085418415, "aux_brier/n_active_tok": 185.5, "aux_brier/n_groups": 10.0625, "aux_brier/n_step_records": 46.375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5910734543391947, "calib/avg_num_step_conf": 5.796875, "calib/ece": 0.22195999999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.02, "calib/gap": 0.049817073170731696, "calib/mean_conf": 0.48268, "calib/mu_c": 0.4998170731707317, "calib/mu_w": 0.45, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.98828125, "calib/nonempty_step_conf_rate": 0.9765625, "calib/pce": 0.02432, "calib/std_conf": 0.1776997962857583, "calib/step_conf_rate": 0.9765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2923.0, "completions/max_terminated_length": 2923.0, "completions/mean_length": 472.71875, "completions/mean_terminated_length": 476.4409484863281, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.128, "grad_norm": 0.025851737707853317, "learning_rate": 2.222222222222222e-06, "loss": 0.0502, "num_tokens": 25666271.0, "reward": 1.3009119033813477, "reward_std": 0.2528459429740906, "rewards/accuracy_reward_step": 0.640625, "rewards/final_brier_reward_step": 0.7114601731300354, "rewards/format_reward_step": 0.96484375, "step": 120 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.023877427315135e-07, "aux_brier/mean_group_std": 0.11907712279835875, "aux_brier/mean_r": 0.8038788502801557, "aux_brier/n_active_tok": 192.75, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 48.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6102017510468214, "calib/avg_num_step_conf": 6.1328125, "calib/ece": 0.14126482213438737, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.03162055335968379, "calib/gap": 0.08346402740768938, "calib/mean_conf": 0.515494071146245, "calib/mu_c": 0.5521126760563381, "calib/mu_w": 0.4686486486486487, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.04774703557312254, "calib/std_conf": 0.21272617111651348, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1888.0, "completions/max_terminated_length": 1888.0, "completions/mean_length": 487.54296875, "completions/mean_terminated_length": 489.4549255371094, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.12906666666666666, "grad_norm": 0.025088908150792122, "learning_rate": 2.1944444444444445e-06, "loss": 0.0059, "num_tokens": 25896138.0, "reward": 1.2180376052856445, "reward_std": 0.34152498841285706, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.7237125039100647, "rewards/format_reward_step": 0.96484375, "step": 121 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.348943218381642e-09, "aux_brier/mean_group_std": 0.10521925750729397, "aux_brier/mean_r": 0.8139436443924074, "aux_brier/n_active_tok": 196.375, "aux_brier/n_groups": 11.09375, "aux_brier/n_step_records": 49.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6766013071895425, "calib/avg_num_step_conf": 6.23828125, "calib/ece": 0.14813492063492065, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.051587301587301584, "calib/gap": 0.13165882352941177, "calib/mean_conf": 0.5229761904761905, "calib/mu_c": 0.5762666666666667, "calib/mu_w": 0.44460784313725493, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.03793650793650793, "calib/std_conf": 0.21325024290671568, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2812.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 481.2421875, "completions/mean_terminated_length": 483.12945556640625, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.13013333333333332, "grad_norm": 0.016481168568134308, "learning_rate": 2.166666666666667e-06, "loss": 0.0513, "num_tokens": 26126680.0, "reward": 1.2624410390853882, "reward_std": 0.23664981126785278, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.752889096736908, "rewards/format_reward_step": 0.9765625, "step": 122 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.804543759200474e-08, "aux_brier/mean_group_std": 0.1333532445532744, "aux_brier/mean_r": 0.7819713444984945, "aux_brier/n_active_tok": 215.625, "aux_brier/n_groups": 14.21875, "aux_brier/n_step_records": 53.90625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5212323588709679, "calib/avg_num_step_conf": 6.87109375, "calib/ece": 0.15730158730158728, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.00902721774193549, "calib/mean_conf": 0.5392857142857141, "calib/mu_c": 0.5438709677419354, "calib/mu_w": 0.53484375, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.10226190476190472, "calib/std_conf": 0.21328562923269867, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2540.0, "completions/max_terminated_length": 2540.0, "completions/mean_length": 534.5625, "completions/mean_terminated_length": 536.6588745117188, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.1312, "grad_norm": 0.011090341955423355, "learning_rate": 2.138888888888889e-06, "loss": 0.0172, "num_tokens": 26368816.0, "reward": 1.1451952457427979, "reward_std": 0.34802937507629395, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.6901558637619019, "rewards/format_reward_step": 0.9765625, "step": 123 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.804361607493092e-08, "aux_brier/mean_group_std": 0.12186208698547026, "aux_brier/mean_r": 0.7878657593461644, "aux_brier/n_active_tok": 207.125, "aux_brier/n_groups": 11.5625, "aux_brier/n_step_records": 51.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5049013447279125, "calib/avg_num_step_conf": 6.58203125, "calib/ece": 0.17185294117647062, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.09411764705882353, "calib/gap": -0.0019676071383688543, "calib/mean_conf": 0.5660294117647058, "calib/mu_c": 0.5651883561643835, "calib/mu_w": 0.5671559633027523, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08266666666666664, "calib/std_conf": 0.22180707064006883, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1131.0, "completions/max_terminated_length": 1131.0, "completions/mean_length": 480.09375, "completions/mean_terminated_length": 481.97650146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 153.0, "epoch": 0.13226666666666667, "grad_norm": 0.03438295051455498, "learning_rate": 2.1111111111111114e-06, "loss": -0.0223, "num_tokens": 26598536.0, "reward": 1.2419825792312622, "reward_std": 0.27619826793670654, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.7023051977157593, "rewards/format_reward_step": 0.9921875, "step": 124 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.631051929246624e-09, "aux_brier/mean_group_std": 0.11003318241392289, "aux_brier/mean_r": 0.7682655333870341, "aux_brier/n_active_tok": 201.875, "aux_brier/n_groups": 12.65625, "aux_brier/n_step_records": 50.46875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5695820630081301, "calib/avg_num_step_conf": 6.64453125, "calib/ece": 0.1550572111553785, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.09163346613545817, "calib/gap": 0.05913990853658524, "calib/mean_conf": 0.5556946613545816, "calib/mu_c": 0.5858536585365853, "calib/mu_w": 0.5267137500000001, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.11035601593625499, "calib/std_conf": 0.22917609594105082, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2657.0, "completions/max_terminated_length": 2657.0, "completions/mean_length": 546.6875, "completions/mean_terminated_length": 548.8314208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.13333333333333333, "grad_norm": 0.09529190510511398, "learning_rate": 2.0833333333333334e-06, "loss": 0.0526, "num_tokens": 26843296.0, "reward": 1.1424918174743652, "reward_std": 0.33072489500045776, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.7027797698974609, "rewards/format_reward_step": 0.97265625, "step": 125 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.0692540539379856e-07, "aux_brier/mean_group_std": 0.13873352559777485, "aux_brier/mean_r": 0.7887880291169385, "aux_brier/n_active_tok": 229.0, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 57.25, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6305021502656211, "calib/avg_num_step_conf": 7.19140625, "calib/ece": 0.12091269841269836, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07539682539682539, "calib/gap": 0.10361624082974963, "calib/mean_conf": 0.5854365079365079, "calib/mu_c": 0.6339552238805971, "calib/mu_w": 0.5303389830508475, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.08730158730158726, "calib/std_conf": 0.22710087948051053, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2402.0, "completions/max_terminated_length": 2402.0, "completions/mean_length": 529.53515625, "completions/mean_terminated_length": 533.7047119140625, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.1344, "grad_norm": 0.2039298713207245, "learning_rate": 2.0555555555555555e-06, "loss": 0.0078, "num_tokens": 27084321.0, "reward": 1.195006012916565, "reward_std": 0.30686506628990173, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.7331492900848389, "rewards/format_reward_step": 0.9765625, "step": 126 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.2099292080211015e-08, "aux_brier/mean_group_std": 0.132268770512582, "aux_brier/mean_r": 0.7519659990795201, "aux_brier/n_active_tok": 225.75, "aux_brier/n_groups": 13.09375, "aux_brier/n_step_records": 56.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5826103720840563, "calib/avg_num_step_conf": 7.0703125, "calib/ece": 0.223072, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.112, "calib/gap": 0.07049778291883557, "calib/mean_conf": 0.5890080000000001, "calib/mu_c": 0.6265128205128204, "calib/mu_w": 0.5560150375939849, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17204000000000003, "calib/std_conf": 0.2603389942670901, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2733.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 503.84765625, "completions/mean_terminated_length": 505.82354736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 123.0, "epoch": 0.13546666666666668, "grad_norm": 0.025640472769737244, "learning_rate": 2.027777777777778e-06, "loss": 0.0199, "num_tokens": 27316978.0, "reward": 1.1171164512634277, "reward_std": 0.269479900598526, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.68721604347229, "rewards/format_reward_step": 0.9765625, "step": 127 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.8550026526241723e-07, "aux_brier/mean_group_std": 0.11626457616988539, "aux_brier/mean_r": 0.7833039722465267, "aux_brier/n_active_tok": 200.375, "aux_brier/n_groups": 10.96875, "aux_brier/n_step_records": 50.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5323210685483871, "calib/avg_num_step_conf": 6.26953125, "calib/ece": 0.18956349206349205, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07142857142857142, "calib/gap": 0.03445564516129018, "calib/mean_conf": 0.567420634920635, "calib/mu_c": 0.5843749999999999, "calib/mu_w": 0.5499193548387097, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.12452380952380951, "calib/std_conf": 0.2308973472585284, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2899.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 520.06640625, "completions/mean_terminated_length": 522.1058959960938, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.13653333333333334, "grad_norm": 0.040990106761455536, "learning_rate": 2.0000000000000003e-06, "loss": 0.0555, "num_tokens": 27556779.0, "reward": 1.1615229845046997, "reward_std": 0.37365710735321045, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6929668188095093, "rewards/format_reward_step": 0.9765625, "step": 128 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.1032372782568984e-09, "aux_brier/mean_group_std": 0.14496101095711636, "aux_brier/mean_r": 0.7684938475408516, "aux_brier/n_active_tok": 211.0, "aux_brier/n_groups": 11.15625, "aux_brier/n_step_records": 52.75, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5871559633027522, "calib/avg_num_step_conf": 6.85546875, "calib/ece": 0.11456692913385826, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0984251968503937, "calib/gap": 0.061095855741853944, "calib/mean_conf": 0.6015748031496063, "calib/mu_c": 0.6277931034482759, "calib/mu_w": 0.5666972477064219, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.07263779527559053, "calib/std_conf": 0.20763355514797324, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1865.0, "completions/max_terminated_length": 1865.0, "completions/mean_length": 472.22265625, "completions/mean_terminated_length": 472.22265625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.1376, "grad_norm": 0.3489000201225281, "learning_rate": 1.9722222222222224e-06, "loss": 0.0049, "num_tokens": 27780052.0, "reward": 1.2439736127853394, "reward_std": 0.2687609791755676, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.733707070350647, "rewards/format_reward_step": 0.98828125, "step": 129 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.8302100496535587e-08, "aux_brier/mean_group_std": 0.14008769469974838, "aux_brier/mean_r": 0.7618561898006087, "aux_brier/n_active_tok": 212.25, "aux_brier/n_groups": 11.9375, "aux_brier/n_step_records": 53.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5510839020473705, "calib/avg_num_step_conf": 6.765625, "calib/ece": 0.1560079051383399, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.06719367588932806, "calib/gap": 0.04914625986886112, "calib/mean_conf": 0.5862055335968379, "calib/mu_c": 0.6044654088050314, "calib/mu_w": 0.5553191489361703, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.05687747035573122, "calib/std_conf": 0.2347815174113923, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3006.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 480.3828125, "completions/mean_terminated_length": 482.2666931152344, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.13866666666666666, "grad_norm": 0.06654537469148636, "learning_rate": 1.944444444444445e-06, "loss": -0.025, "num_tokens": 28008318.0, "reward": 1.2934925556182861, "reward_std": 0.24962244927883148, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.7208452820777893, "rewards/format_reward_step": 0.984375, "step": 130 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.407775949788473e-09, "aux_brier/mean_group_std": 0.1155607896182773, "aux_brier/mean_r": 0.7569160221612612, "aux_brier/n_active_tok": 208.75, "aux_brier/n_groups": 11.71875, "aux_brier/n_step_records": 52.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5150358773646445, "calib/avg_num_step_conf": 6.69921875, "calib/ece": 0.2462948207171315, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.07171314741035857, "calib/gap": 0.011600130463144231, "calib/mean_conf": 0.6316334661354582, "calib/mu_c": 0.6383809523809524, "calib/mu_w": 0.6267808219178082, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.229800796812749, "calib/std_conf": 0.20572900481422265, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2054.0, "completions/max_terminated_length": 2054.0, "completions/mean_length": 462.64453125, "completions/mean_terminated_length": 466.28741455078125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.13973333333333332, "grad_norm": 0.03153286501765251, "learning_rate": 1.916666666666667e-06, "loss": 0.0237, "num_tokens": 28232963.0, "reward": 1.0600390434265137, "reward_std": 0.25981175899505615, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.654218316078186, "rewards/format_reward_step": 0.97265625, "step": 131 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -9.654283510476813e-08, "aux_brier/mean_group_std": 0.137355077775322, "aux_brier/mean_r": 0.7200141650152183, "aux_brier/n_active_tok": 232.875, "aux_brier/n_groups": 12.71875, "aux_brier/n_step_records": 58.21875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.451530612244898, "calib/avg_num_step_conf": 7.515625, "calib/ece": 0.20570669291338578, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.16141732283464566, "calib/gap": -0.03189024071166935, "calib/mean_conf": 0.6553169291338582, "calib/mu_c": 0.6430128205128205, "calib/mu_w": 0.6749030612244898, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12342519685039366, "calib/std_conf": 0.22389078940310644, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1447.0, "completions/max_terminated_length": 1447.0, "completions/mean_length": 493.95703125, "completions/mean_terminated_length": 497.8464660644531, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.1408, "grad_norm": 0.02973356284201145, "learning_rate": 1.888888888888889e-06, "loss": -0.0015, "num_tokens": 28465008.0, "reward": 1.275361180305481, "reward_std": 0.3099433183670044, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6873823404312134, "rewards/format_reward_step": 0.98828125, "step": 132 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2209479754998753e-07, "aux_brier/mean_group_std": 0.1348067017372979, "aux_brier/mean_r": 0.7201088907026406, "aux_brier/n_active_tok": 241.125, "aux_brier/n_groups": 12.5, "aux_brier/n_step_records": 60.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5112315270935961, "calib/avg_num_step_conf": 7.85546875, "calib/ece": 0.3036800000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.196, "calib/gap": 0.0005057471264368063, "calib/mean_conf": 0.67104, "calib/mu_c": 0.6713333333333334, "calib/mu_w": 0.6708275862068966, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2773600000000001, "calib/std_conf": 0.22456116850426297, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2548.0, "completions/max_terminated_length": 2548.0, "completions/mean_length": 573.53125, "completions/mean_terminated_length": 580.33203125, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.14186666666666667, "grad_norm": 0.06560198962688446, "learning_rate": 1.8611111111111113e-06, "loss": 0.013, "num_tokens": 28718176.0, "reward": 1.0554680824279785, "reward_std": 0.35876548290252686, "rewards/accuracy_reward_step": 0.41015625, "rewards/final_brier_reward_step": 0.6281226873397827, "rewards/format_reward_step": 0.9765625, "step": 133 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.438481327970976e-08, "aux_brier/mean_group_std": 0.14870094670260808, "aux_brier/mean_r": 0.7461208339567589, "aux_brier/n_active_tok": 238.0, "aux_brier/n_groups": 13.53125, "aux_brier/n_step_records": 59.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.64296875, "calib/avg_num_step_conf": 7.70703125, "calib/ece": 0.13858498023715418, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.15019762845849802, "calib/gap": 0.12917525000000007, "calib/mean_conf": 0.6332094861660079, "calib/mu_c": 0.69703125, "calib/mu_w": 0.5678559999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.1329328063241107, "calib/std_conf": 0.24064815226933503, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2232.0, "completions/max_terminated_length": 2232.0, "completions/mean_length": 576.75, "completions/mean_terminated_length": 576.75, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 0.14293333333333333, "grad_norm": 0.006441768724471331, "learning_rate": 1.8333333333333333e-06, "loss": 0.0259, "num_tokens": 28974776.0, "reward": 1.174364686012268, "reward_std": 0.37051185965538025, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.728708803653717, "rewards/format_reward_step": 0.984375, "step": 134 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.983773572829863e-08, "aux_brier/mean_group_std": 0.1297264128786359, "aux_brier/mean_r": 0.7291125413853531, "aux_brier/n_active_tok": 224.125, "aux_brier/n_groups": 12.84375, "aux_brier/n_step_records": 56.03125, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.48194886062533127, "calib/avg_num_step_conf": 7.22265625, "calib/ece": 0.2094331983805668, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.12955465587044535, "calib/gap": -0.008500927397986136, "calib/mean_conf": 0.6403643724696357, "calib/mu_c": 0.6365441176470589, "calib/mu_w": 0.645045045045045, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.14959514170040486, "calib/std_conf": 0.22234003216867662, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2825.0, "completions/max_terminated_length": 2825.0, "completions/mean_length": 555.49609375, "completions/mean_terminated_length": 562.0830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.144, "grad_norm": 0.1661306470632553, "learning_rate": 1.8055555555555557e-06, "loss": 0.0306, "num_tokens": 29222863.0, "reward": 1.1777374744415283, "reward_std": 0.31354355812072754, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6640746593475342, "rewards/format_reward_step": 0.9609375, "step": 135 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.93599249562193e-07, "aux_brier/mean_group_std": 0.15232687004347287, "aux_brier/mean_r": 0.7201905545109366, "aux_brier/n_active_tok": 246.5, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 61.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5867794486215538, "calib/avg_num_step_conf": 7.7734375, "calib/ece": 0.24266758893280632, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.15019762845849802, "calib/gap": 0.07194099624060157, "calib/mean_conf": 0.6747237154150199, "calib/mu_c": 0.7125425, "calib/mu_w": 0.6406015037593984, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22154150197628458, "calib/std_conf": 0.2387131814435293, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2365.0, "completions/max_terminated_length": 2365.0, "completions/mean_length": 516.62890625, "completions/mean_terminated_length": 518.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.14506666666666668, "grad_norm": 0.33239132165908813, "learning_rate": 1.777777777777778e-06, "loss": 0.0146, "num_tokens": 29463608.0, "reward": 1.128124475479126, "reward_std": 0.33076632022857666, "rewards/accuracy_reward_step": 0.46875, "rewards/final_brier_reward_step": 0.6765604019165039, "rewards/format_reward_step": 0.98046875, "step": 136 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.2219608319939965e-08, "aux_brier/mean_group_std": 0.1344626168187161, "aux_brier/mean_r": 0.6946798623143181, "aux_brier/n_active_tok": 242.375, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 60.59375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.545904761904762, "calib/avg_num_step_conf": 7.61328125, "calib/ece": 0.23932270916334664, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.22709163346613545, "calib/gap": 0.04354603174603189, "calib/mean_conf": 0.7349402390438247, "calib/mu_c": 0.7568000000000001, "calib/mu_w": 0.7132539682539683, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.23812749003984066, "calib/std_conf": 0.1949897750264949, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2646.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 516.80859375, "completions/mean_terminated_length": 518.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.14613333333333334, "grad_norm": 0.22667790949344635, "learning_rate": 1.75e-06, "loss": 0.0237, "num_tokens": 29702895.0, "reward": 1.1392452716827393, "reward_std": 0.30574119091033936, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.65854412317276, "rewards/format_reward_step": 0.97265625, "step": 137 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.4257915937938606e-09, "aux_brier/mean_group_std": 0.12397088904367567, "aux_brier/mean_r": 0.6864214823892756, "aux_brier/n_active_tok": 246.875, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 61.71875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4984061313076506, "calib/avg_num_step_conf": 7.890625, "calib/ece": 0.20135823293172683, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.18875502008032127, "calib/gap": 0.003629584915897821, "calib/mean_conf": 0.7053084337349398, "calib/mu_c": 0.7067223684210525, "calib/mu_w": 0.7030927835051547, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14811244979919677, "calib/std_conf": 0.2043237700837609, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 557.5390625, "completions/mean_terminated_length": 557.5390625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.1472, "grad_norm": 0.009447906166315079, "learning_rate": 1.7222222222222224e-06, "loss": 0.0376, "num_tokens": 29949961.0, "reward": 1.2512726783752441, "reward_std": 0.4080669581890106, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6925908327102661, "rewards/format_reward_step": 0.96875, "step": 138 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.3186143565935282e-08, "aux_brier/mean_group_std": 0.1327250144441744, "aux_brier/mean_r": 0.7265301382303925, "aux_brier/n_active_tok": 218.875, "aux_brier/n_groups": 11.0, "aux_brier/n_step_records": 54.71875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6001890359168243, "calib/avg_num_step_conf": 6.84375, "calib/ece": 0.1675889328063241, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.17786561264822134, "calib/gap": 0.06756211180124216, "calib/mean_conf": 0.678102766798419, "calib/mu_c": 0.7026708074534161, "calib/mu_w": 0.635108695652174, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.10466403162055336, "calib/std_conf": 0.21539089686159638, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 498.16796875, "completions/mean_terminated_length": 498.16796875, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.14826666666666666, "grad_norm": 0.012751229107379913, "learning_rate": 1.6944444444444446e-06, "loss": 0.0311, "num_tokens": 30180588.0, "reward": 1.307733178138733, "reward_std": 0.22187411785125732, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.7387453317642212, "rewards/format_reward_step": 0.98046875, "step": 139 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.7191023127537406e-08, "aux_brier/mean_group_std": 0.13010740070451135, "aux_brier/mean_r": 0.7169416583971014, "aux_brier/n_active_tok": 211.375, "aux_brier/n_groups": 10.9375, "aux_brier/n_step_records": 52.84375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5336568242824695, "calib/avg_num_step_conf": 6.6875, "calib/ece": 0.17704251968503934, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.15354330708661418, "calib/gap": 0.01236474636933027, "calib/mean_conf": 0.6674456692913386, "calib/mu_c": 0.6716808383233533, "calib/mu_w": 0.6593160919540231, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.093503937007874, "calib/std_conf": 0.23274685895895716, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2259.0, "completions/max_terminated_length": 2259.0, "completions/mean_length": 511.34375, "completions/mean_terminated_length": 511.34375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.14933333333333335, "grad_norm": 0.0271713025867939, "learning_rate": 1.6666666666666667e-06, "loss": 0.0081, "num_tokens": 30416508.0, "reward": 1.3259658813476562, "reward_std": 0.276435911655426, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7179260849952698, "rewards/format_reward_step": 0.98828125, "step": 140 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.22849746017717e-08, "aux_brier/mean_group_std": 0.15749152034330596, "aux_brier/mean_r": 0.7004585867055033, "aux_brier/n_active_tok": 245.25, "aux_brier/n_groups": 13.5625, "aux_brier/n_step_records": 61.3125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5909985076651743, "calib/avg_num_step_conf": 7.77734375, "calib/ece": 0.15122134387351782, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.1857707509881423, "calib/gap": 0.07026868810202158, "calib/mean_conf": 0.6886205533596839, "calib/mu_c": 0.713895061728395, "calib/mu_w": 0.6436263736263734, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09976284584980238, "calib/std_conf": 0.2203900564201202, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2957.0, "completions/max_terminated_length": 2957.0, "completions/mean_length": 551.46875, "completions/mean_terminated_length": 553.6314086914062, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.1504, "grad_norm": 0.05325297638773918, "learning_rate": 1.638888888888889e-06, "loss": 0.025, "num_tokens": 30664780.0, "reward": 1.3164465427398682, "reward_std": 0.26625582575798035, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.742348313331604, "rewards/format_reward_step": 0.98828125, "step": 141 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.729933583691448e-08, "aux_brier/mean_group_std": 0.13612495295185428, "aux_brier/mean_r": 0.6964062674338573, "aux_brier/n_active_tok": 234.0, "aux_brier/n_groups": 12.09375, "aux_brier/n_step_records": 58.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48623853211009177, "calib/avg_num_step_conf": 7.4921875, "calib/ece": 0.2435545454545455, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2134387351778656, "calib/gap": -0.011872687308868701, "calib/mean_conf": 0.6929671936758893, "calib/mu_c": 0.6878520833333333, "calib/mu_w": 0.699724770642202, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18367588932806328, "calib/std_conf": 0.22810892115804543, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2665.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 532.546875, "completions/mean_terminated_length": 534.6353149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 147.0, "epoch": 0.15146666666666667, "grad_norm": 0.04511009156703949, "learning_rate": 1.6111111111111113e-06, "loss": 0.0316, "num_tokens": 30906272.0, "reward": 1.2223587036132812, "reward_std": 0.2563696503639221, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6706851124763489, "rewards/format_reward_step": 0.984375, "step": 142 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.395817313791351e-08, "aux_brier/mean_group_std": 0.14865711191630093, "aux_brier/mean_r": 0.724915488451146, "aux_brier/n_active_tok": 253.875, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 63.46875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.6018359529784705, "calib/avg_num_step_conf": 8.30859375, "calib/ece": 0.1801153846153846, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.15789473684210525, "calib/gap": 0.094835490688152, "calib/mean_conf": 0.6800465587044534, "calib/mu_c": 0.7234328358208955, "calib/mu_w": 0.6285973451327435, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15882591093117407, "calib/std_conf": 0.24248286983675946, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2457.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 523.28125, "completions/mean_terminated_length": 533.7052001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.15253333333333333, "grad_norm": 0.19991473853588104, "learning_rate": 1.5833333333333333e-06, "loss": 0.0282, "num_tokens": 31147568.0, "reward": 1.1798129081726074, "reward_std": 0.31810373067855835, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6958143711090088, "rewards/format_reward_step": 0.96484375, "step": 143 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.2023540246763886e-08, "aux_brier/mean_group_std": 0.14258853771469546, "aux_brier/mean_r": 0.6502506737865968, "aux_brier/n_active_tok": 242.375, "aux_brier/n_groups": 12.40625, "aux_brier/n_step_records": 60.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5428190032858707, "calib/avg_num_step_conf": 7.76953125, "calib/ece": 0.18601496062992132, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.23622047244094488, "calib/gap": 0.04648246166484127, "calib/mean_conf": 0.7169771653543308, "calib/mu_c": 0.7330813253012048, "calib/mu_w": 0.6865988636363636, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.12472440944881893, "calib/std_conf": 0.23648723399114735, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 554.41015625, "completions/mean_terminated_length": 556.5843505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 134.0, "epoch": 0.1536, "grad_norm": 0.18849033117294312, "learning_rate": 1.5555555555555558e-06, "loss": 0.0054, "num_tokens": 31393625.0, "reward": 1.3217639923095703, "reward_std": 0.30717578530311584, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7245556712150574, "rewards/format_reward_step": 0.984375, "step": 144 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.754544111196246e-08, "aux_brier/mean_group_std": 0.1365223424905456, "aux_brier/mean_r": 0.6510119951500244, "aux_brier/n_active_tok": 245.75, "aux_brier/n_groups": 13.78125, "aux_brier/n_step_records": 61.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4386400116737196, "calib/avg_num_step_conf": 7.84765625, "calib/ece": 0.2201411764705883, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.25098039215686274, "calib/gap": -0.04691536553334308, "calib/mean_conf": 0.7302901960784314, "calib/mu_c": 0.716123595505618, "calib/mu_w": 0.7630389610389611, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1261960784313726, "calib/std_conf": 0.22255499012462812, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2860.0, "completions/max_terminated_length": 2860.0, "completions/mean_length": 536.78515625, "completions/mean_terminated_length": 536.78515625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.15466666666666667, "grad_norm": 0.08550148457288742, "learning_rate": 1.527777777777778e-06, "loss": 0.0466, "num_tokens": 31633746.0, "reward": 1.369643211364746, "reward_std": 0.3430304527282715, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.7129477262496948, "rewards/format_reward_step": 0.9921875, "step": 145 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.0325533861841336e-08, "aux_brier/mean_group_std": 0.13788914031681604, "aux_brier/mean_r": 0.6849627388216702, "aux_brier/n_active_tok": 254.375, "aux_brier/n_groups": 13.0625, "aux_brier/n_step_records": 63.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6018697614442294, "calib/avg_num_step_conf": 8.3359375, "calib/ece": 0.30524103585657375, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.24701195219123506, "calib/gap": 0.07047804642166355, "calib/mean_conf": 0.7066314741035856, "calib/mu_c": 0.7462227272727274, "calib/mu_w": 0.6757446808510639, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28681274900398407, "calib/std_conf": 0.22728233456868213, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 539.65625, "completions/mean_terminated_length": 548.2222290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.15573333333333333, "grad_norm": 0.1618785560131073, "learning_rate": 1.5e-06, "loss": 0.0133, "num_tokens": 31879114.0, "reward": 1.0828824043273926, "reward_std": 0.325797438621521, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6518425345420837, "rewards/format_reward_step": 0.98046875, "step": 146 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1245945789883915e-07, "aux_brier/mean_group_std": 0.1140685566791095, "aux_brier/mean_r": 0.6704382420288377, "aux_brier/n_active_tok": 235.375, "aux_brier/n_groups": 12.3125, "aux_brier/n_step_records": 58.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5874392782789729, "calib/avg_num_step_conf": 7.71875, "calib/ece": 0.31639285714285725, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.23809523809523808, "calib/gap": 0.042561289508548206, "calib/mean_conf": 0.7385277777777779, "calib/mu_c": 0.7606528925619833, "calib/mu_w": 0.7180916030534351, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.28738095238095246, "calib/std_conf": 0.22165455843725412, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2562.0, "completions/max_terminated_length": 2562.0, "completions/mean_length": 536.90234375, "completions/mean_terminated_length": 543.268798828125, "completions/min_length": 0.0, "completions/min_terminated_length": 204.0, "epoch": 0.1568, "grad_norm": 0.04802270233631134, "learning_rate": 1.4722222222222225e-06, "loss": -0.0052, "num_tokens": 32120241.0, "reward": 1.1212406158447266, "reward_std": 0.29729151725769043, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6412124633789062, "rewards/format_reward_step": 0.9765625, "step": 147 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.5773953591111685e-08, "aux_brier/mean_group_std": 0.16056587768223612, "aux_brier/mean_r": 0.6798656600175332, "aux_brier/n_active_tok": 240.625, "aux_brier/n_groups": 12.5, "aux_brier/n_step_records": 60.15625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5228978978978979, "calib/avg_num_step_conf": 7.5859375, "calib/ece": 0.20421052631578954, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.242914979757085, "calib/gap": 0.012725225225225234, "calib/mean_conf": 0.7442914979757086, "calib/mu_c": 0.7493918918918918, "calib/mu_w": 0.7366666666666666, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1746558704453442, "calib/std_conf": 0.19479305963396576, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 524.8984375, "completions/mean_terminated_length": 531.12255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.15786666666666666, "grad_norm": 0.0756717175245285, "learning_rate": 1.4444444444444445e-06, "loss": 0.01, "num_tokens": 32359727.0, "reward": 1.2310714721679688, "reward_std": 0.30463019013404846, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6820984482765198, "rewards/format_reward_step": 0.96484375, "step": 148 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.702378270966424e-08, "aux_brier/mean_group_std": 0.1336615051271146, "aux_brier/mean_r": 0.6500482644182775, "aux_brier/n_active_tok": 255.125, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 63.78125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4933850364963504, "calib/avg_num_step_conf": 8.546875, "calib/ece": 0.2459437751004016, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.26907630522088355, "calib/gap": 0.0168338764337852, "calib/mean_conf": 0.7530923694779116, "calib/mu_c": 0.7606642335766424, "calib/mu_w": 0.7438303571428572, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22441767068273094, "calib/std_conf": 0.20457774001304102, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2702.0, "completions/max_terminated_length": 2702.0, "completions/mean_length": 603.33984375, "completions/mean_terminated_length": 608.090576171875, "completions/min_length": 0.0, "completions/min_terminated_length": 176.0, "epoch": 0.15893333333333334, "grad_norm": 0.061738647520542145, "learning_rate": 1.4166666666666667e-06, "loss": 0.0311, "num_tokens": 32618638.0, "reward": 1.186309576034546, "reward_std": 0.3024241328239441, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6593006253242493, "rewards/format_reward_step": 0.97265625, "step": 149 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.4711307985980646e-08, "aux_brier/mean_group_std": 0.1162967303826397, "aux_brier/mean_r": 0.6725437964829573, "aux_brier/n_active_tok": 244.0, "aux_brier/n_groups": 12.59375, "aux_brier/n_step_records": 61.0, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5235294117647059, "calib/avg_num_step_conf": 7.98828125, "calib/ece": 0.23097165991902835, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.2388663967611336, "calib/gap": 0.016306964164976456, "calib/mean_conf": 0.7338866396761133, "calib/mu_c": 0.7406206896551725, "calib/mu_w": 0.724313725490196, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18890688259109312, "calib/std_conf": 0.21135066727678872, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2970.0, "completions/max_terminated_length": 2970.0, "completions/mean_length": 524.33984375, "completions/mean_terminated_length": 528.468505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.16, "grad_norm": 0.1214539110660553, "learning_rate": 1.3888888888888892e-06, "loss": -0.0029, "num_tokens": 32857829.0, "reward": 1.2152711153030396, "reward_std": 0.31213098764419556, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6735844016075134, "rewards/format_reward_step": 0.9609375, "step": 150 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.534670737712851e-09, "aux_brier/mean_group_std": 0.14812076655019005, "aux_brier/mean_r": 0.62853573985209, "aux_brier/n_active_tok": 258.125, "aux_brier/n_groups": 15.59375, "aux_brier/n_step_records": 64.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5295977738426512, "calib/avg_num_step_conf": 8.265625, "calib/ece": 0.3240964285714286, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.24603174603174602, "calib/gap": 0.017215494561092837, "calib/mean_conf": 0.725506746031746, "calib/mu_c": 0.7346610169491525, "calib/mu_w": 0.7174455223880597, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.29067460317460325, "calib/std_conf": 0.22331088887336348, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3043.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 582.33984375, "completions/mean_terminated_length": 582.33984375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.16106666666666666, "grad_norm": 0.033875975757837296, "learning_rate": 1.3611111111111112e-06, "loss": 0.0346, "num_tokens": 33113932.0, "reward": 1.1087212562561035, "reward_std": 0.31823089718818665, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6301976442337036, "rewards/format_reward_step": 0.98046875, "step": 151 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.171292599692734e-08, "aux_brier/mean_group_std": 0.12296069096741848, "aux_brier/mean_r": 0.6774161609992941, "aux_brier/n_active_tok": 251.625, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 62.90625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5619179710719239, "calib/avg_num_step_conf": 8.06640625, "calib/ece": 0.3388000000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2, "calib/gap": 0.0504154283072451, "calib/mean_conf": 0.74288, "calib/mu_c": 0.7725242718446601, "calib/mu_w": 0.722108843537415, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33484000000000014, "calib/std_conf": 0.19081222602338666, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2901.0, "completions/max_terminated_length": 2901.0, "completions/mean_length": 585.2734375, "completions/mean_terminated_length": 587.5686645507812, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.16213333333333332, "grad_norm": 0.06952095031738281, "learning_rate": 1.3333333333333334e-06, "loss": 0.0106, "num_tokens": 33369154.0, "reward": 1.0459668636322021, "reward_std": 0.34268027544021606, "rewards/accuracy_reward_step": 0.40234375, "rewards/final_brier_reward_step": 0.6213672161102295, "rewards/format_reward_step": 0.9765625, "step": 152 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.4909133649876338e-08, "aux_brier/mean_group_std": 0.15427189803094768, "aux_brier/mean_r": 0.6736047364927136, "aux_brier/n_active_tok": 266.25, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 66.5625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47341972187104925, "calib/avg_num_step_conf": 8.5625, "calib/ece": 0.2974861660079052, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2845849802371542, "calib/gap": -0.017457142857142727, "calib/mean_conf": 0.7703399209486167, "calib/mu_c": 0.7625428571428572, "calib/mu_w": 0.7799999999999999, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2572332015810277, "calib/std_conf": 0.19781900505777772, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1866.0, "completions/max_terminated_length": 1866.0, "completions/mean_length": 567.74609375, "completions/mean_terminated_length": 572.216552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.1632, "grad_norm": 0.12804493308067322, "learning_rate": 1.3055555555555556e-06, "loss": 0.013, "num_tokens": 33621817.0, "reward": 1.2012845277786255, "reward_std": 0.3204795718193054, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6488880515098572, "rewards/format_reward_step": 0.984375, "step": 153 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.536497596492218e-08, "aux_brier/mean_group_std": 0.14129734121017762, "aux_brier/mean_r": 0.6855632723165584, "aux_brier/n_active_tok": 244.375, "aux_brier/n_groups": 13.0, "aux_brier/n_step_records": 61.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5750062924742009, "calib/avg_num_step_conf": 7.88671875, "calib/ece": 0.23221343873517797, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2134387351778656, "calib/gap": 0.04402529574628744, "calib/mean_conf": 0.7399604743083004, "calib/mu_c": 0.7601459854014598, "calib/mu_w": 0.7161206896551724, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21533596837944674, "calib/std_conf": 0.18521325818334297, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2258.0, "completions/max_terminated_length": 2258.0, "completions/mean_length": 532.609375, "completions/mean_terminated_length": 536.8031616210938, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.16426666666666667, "grad_norm": 0.14508311450481415, "learning_rate": 1.2777777777777779e-06, "loss": -0.0078, "num_tokens": 33862605.0, "reward": 1.199329137802124, "reward_std": 0.27426254749298096, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6879417896270752, "rewards/format_reward_step": 0.984375, "step": 154 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.1333737726104687e-08, "aux_brier/mean_group_std": 0.14273178240042994, "aux_brier/mean_r": 0.7172398380853386, "aux_brier/n_active_tok": 230.5, "aux_brier/n_groups": 11.40625, "aux_brier/n_step_records": 57.625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5188839848675915, "calib/avg_num_step_conf": 7.23046875, "calib/ece": 0.2647174603174603, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.15476190476190477, "calib/gap": 0.007943001261034088, "calib/mean_conf": 0.6810761904761905, "calib/mu_c": 0.6851737704918033, "calib/mu_w": 0.6772307692307692, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23083333333333333, "calib/std_conf": 0.2154397604563455, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1341.0, "completions/max_terminated_length": 1341.0, "completions/mean_length": 507.97265625, "completions/mean_terminated_length": 509.9647216796875, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.16533333333333333, "grad_norm": 0.024876870214939117, "learning_rate": 1.25e-06, "loss": -0.0005, "num_tokens": 34099862.0, "reward": 1.1372969150543213, "reward_std": 0.3188687562942505, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6585627794265747, "rewards/format_reward_step": 0.984375, "step": 155 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.299241327024081e-07, "aux_brier/mean_group_std": 0.1533016898820759, "aux_brier/mean_r": 0.6700811224225482, "aux_brier/n_active_tok": 272.125, "aux_brier/n_groups": 15.125, "aux_brier/n_step_records": 68.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6055640243902439, "calib/avg_num_step_conf": 8.54296875, "calib/ece": 0.26350597609561754, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.26693227091633465, "calib/gap": 0.09890561483739835, "calib/mean_conf": 0.7535458167330678, "calib/mu_c": 0.8039837398373983, "calib/mu_w": 0.705078125, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26350597609561754, "calib/std_conf": 0.20812140403418777, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2407.0, "completions/max_terminated_length": 2407.0, "completions/mean_length": 570.92578125, "completions/mean_terminated_length": 573.1647338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.1664, "grad_norm": 0.16774854063987732, "learning_rate": 1.2222222222222223e-06, "loss": 0.0066, "num_tokens": 34350779.0, "reward": 1.1346614360809326, "reward_std": 0.3221951127052307, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6714582443237305, "rewards/format_reward_step": 0.97265625, "step": 156 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.0921771084748677e-08, "aux_brier/mean_group_std": 0.1389349366910422, "aux_brier/mean_r": 0.6532888171456129, "aux_brier/n_active_tok": 258.625, "aux_brier/n_groups": 14.3125, "aux_brier/n_step_records": 64.65625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5527375201288245, "calib/avg_num_step_conf": 8.1484375, "calib/ece": 0.1586220472440945, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2440944881889764, "calib/gap": 0.04140767579173388, "calib/mean_conf": 0.7758661417322834, "calib/mu_c": 0.7908641975308642, "calib/mu_w": 0.7494565217391304, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1483464566929134, "calib/std_conf": 0.17248712663157706, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2619.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 548.66015625, "completions/mean_terminated_length": 548.66015625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.16746666666666668, "grad_norm": 0.07889708131551743, "learning_rate": 1.1944444444444446e-06, "loss": -0.0004, "num_tokens": 34594964.0, "reward": 1.3122881650924683, "reward_std": 0.2834504544734955, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.733527660369873, "rewards/format_reward_step": 0.9921875, "step": 157 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.804819772608049e-08, "aux_brier/mean_group_std": 0.1453246407757794, "aux_brier/mean_r": 0.6799631783701684, "aux_brier/n_active_tok": 248.625, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 62.15625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.495826655982906, "calib/avg_num_step_conf": 7.94140625, "calib/ece": 0.17734365079365078, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.21428571428571427, "calib/gap": 0.03819887820512813, "calib/mean_conf": 0.7511531746031747, "calib/mu_c": 0.7657051282051283, "calib/mu_w": 0.7275062500000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15472460317460313, "calib/std_conf": 0.1938342980717538, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1952.0, "completions/max_terminated_length": 1952.0, "completions/mean_length": 543.328125, "completions/mean_terminated_length": 545.4588623046875, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.16853333333333334, "grad_norm": 0.0917496457695961, "learning_rate": 1.1666666666666668e-06, "loss": 0.0501, "num_tokens": 34839296.0, "reward": 1.2805132865905762, "reward_std": 0.32383427023887634, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.7158035039901733, "rewards/format_reward_step": 0.984375, "step": 158 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7531008700277795e-08, "aux_brier/mean_group_std": 0.1327325295708848, "aux_brier/mean_r": 0.6925944917355671, "aux_brier/n_active_tok": 243.75, "aux_brier/n_groups": 12.59375, "aux_brier/n_step_records": 60.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5316851008458034, "calib/avg_num_step_conf": 7.703125, "calib/ece": 0.22784462151394425, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2350597609561753, "calib/gap": 0.030060897852960178, "calib/mean_conf": 0.7161394422310757, "calib/mu_c": 0.7288344827586206, "calib/mu_w": 0.6987735849056604, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.18314741035856577, "calib/std_conf": 0.21142789187860414, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2734.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 529.88671875, "completions/mean_terminated_length": 529.88671875, "completions/min_length": 216.0, "completions/min_terminated_length": 216.0, "epoch": 0.1696, "grad_norm": 0.06974631547927856, "learning_rate": 1.138888888888889e-06, "loss": 0.0219, "num_tokens": 35079731.0, "reward": 1.2298974990844727, "reward_std": 0.2957807183265686, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.693027675151825, "rewards/format_reward_step": 0.98046875, "step": 159 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.129838247433291e-08, "aux_brier/mean_group_std": 0.1394604946798674, "aux_brier/mean_r": 0.6705553631428137, "aux_brier/n_active_tok": 253.875, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 63.46875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5730019493177387, "calib/avg_num_step_conf": 7.96484375, "calib/ece": 0.211566265060241, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.20883534136546184, "calib/gap": 0.06342884990253406, "calib/mean_conf": 0.7371084337349396, "calib/mu_c": 0.7661481481481481, "calib/mu_w": 0.7027192982456141, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2032530120481928, "calib/std_conf": 0.19884524135415815, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3013.0, "completions/max_terminated_length": 3013.0, "completions/mean_length": 562.77734375, "completions/mean_terminated_length": 564.984375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.17066666666666666, "grad_norm": 0.03897664323449135, "learning_rate": 1.111111111111111e-06, "loss": 0.0325, "num_tokens": 35328642.0, "reward": 1.185279130935669, "reward_std": 0.36624908447265625, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6864289045333862, "rewards/format_reward_step": 0.97265625, "step": 160 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.07990402415426e-09, "aux_brier/mean_group_std": 0.12839912022893005, "aux_brier/mean_r": 0.6950615155278358, "aux_brier/n_active_tok": 234.875, "aux_brier/n_groups": 11.625, "aux_brier/n_step_records": 58.71875, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.5360431883137503, "calib/avg_num_step_conf": 7.5234375, "calib/ece": 0.12250980392156871, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.17254901960784313, "calib/gap": 0.018561448078754816, "calib/mean_conf": 0.7298039215686275, "calib/mu_c": 0.7346808510638296, "calib/mu_w": 0.7161194029850748, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.05752941176470591, "calib/std_conf": 0.18535065015810723, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1608.0, "completions/max_terminated_length": 1608.0, "completions/mean_length": 495.1484375, "completions/mean_terminated_length": 497.0902404785156, "completions/min_length": 0.0, "completions/min_terminated_length": 159.0, "epoch": 0.17173333333333332, "grad_norm": 0.09245692193508148, "learning_rate": 1.0833333333333335e-06, "loss": -0.0038, "num_tokens": 35559320.0, "reward": 1.4303349256515503, "reward_std": 0.2664535343647003, "rewards/accuracy_reward_step": 0.73828125, "rewards/final_brier_reward_step": 0.7760273218154907, "rewards/format_reward_step": 0.99609375, "step": 161 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.0031950265231302e-08, "aux_brier/mean_group_std": 0.13714218286645607, "aux_brier/mean_r": 0.6734039229840812, "aux_brier/n_active_tok": 243.25, "aux_brier/n_groups": 12.46875, "aux_brier/n_step_records": 60.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4374364560639071, "calib/avg_num_step_conf": 7.640625, "calib/ece": 0.21780677290836659, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.24701195219123506, "calib/gap": -0.03524760348583866, "calib/mean_conf": 0.7402011952191235, "calib/mu_c": 0.7288264705882354, "calib/mu_w": 0.764074074074074, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14035856573705185, "calib/std_conf": 0.20451244217053277, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1652.0, "completions/max_terminated_length": 1652.0, "completions/mean_length": 522.94140625, "completions/mean_terminated_length": 524.9921875, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1728, "grad_norm": 0.12568475306034088, "learning_rate": 1.0555555555555557e-06, "loss": -0.002, "num_tokens": 35797337.0, "reward": 1.3347465991973877, "reward_std": 0.3291304111480713, "rewards/accuracy_reward_step": 0.66796875, "rewards/final_brier_reward_step": 0.7061737775802612, "rewards/format_reward_step": 0.98046875, "step": 162 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -3.1970422142979515e-09, "aux_brier/mean_group_std": 0.13039504097536303, "aux_brier/mean_r": 0.6381193296439778, "aux_brier/n_active_tok": 264.25, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 66.0625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47998993204127866, "calib/avg_num_step_conf": 8.53125, "calib/ece": 0.2588877470355731, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": 0.006769739491568227, "calib/mean_conf": 0.7718632411067193, "calib/mu_c": 0.7749671532846716, "calib/mu_w": 0.7681974137931034, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24462450592885374, "calib/std_conf": 0.19936554235580253, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2942.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 576.8671875, "completions/mean_terminated_length": 579.1294555664062, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.17386666666666667, "grad_norm": 0.04821145907044411, "learning_rate": 1.0277777777777777e-06, "loss": 0.0362, "num_tokens": 36049847.0, "reward": 1.1968306303024292, "reward_std": 0.2756430506706238, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6545100212097168, "rewards/format_reward_step": 0.98828125, "step": 163 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.3650690605541156e-08, "aux_brier/mean_group_std": 0.16076933863241694, "aux_brier/mean_r": 0.660551060316722, "aux_brier/n_active_tok": 283.0, "aux_brier/n_groups": 15.59375, "aux_brier/n_step_records": 70.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5222257653061224, "calib/avg_num_step_conf": 9.046875, "calib/ece": 0.2796230158730159, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.28174603174603174, "calib/gap": 0.017241071428571342, "calib/mean_conf": 0.7449801587301588, "calib/mu_c": 0.7526428571428572, "calib/mu_w": 0.7354017857142858, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23452380952380958, "calib/std_conf": 0.23564480013050623, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2475.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 615.796875, "completions/mean_terminated_length": 620.6456909179688, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.17493333333333333, "grad_norm": 0.08028343319892883, "learning_rate": 1.0000000000000002e-06, "loss": -0.022, "num_tokens": 36313627.0, "reward": 1.2039921283721924, "reward_std": 0.3271603584289551, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6597186326980591, "rewards/format_reward_step": 0.984375, "step": 164 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.8861954007308732e-08, "aux_brier/mean_group_std": 0.13630591963461353, "aux_brier/mean_r": 0.6881752091403782, "aux_brier/n_active_tok": 254.375, "aux_brier/n_groups": 13.96875, "aux_brier/n_step_records": 63.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5449660633484164, "calib/avg_num_step_conf": 8.14453125, "calib/ece": 0.30766798418972335, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.18972332015810275, "calib/gap": 0.028638134741076016, "calib/mean_conf": 0.7516996047430831, "calib/mu_c": 0.7670940170940173, "calib/mu_w": 0.7384558823529412, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29845849802371544, "calib/std_conf": 0.18471823612934662, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2411.0, "completions/max_terminated_length": 2411.0, "completions/mean_length": 561.1875, "completions/mean_terminated_length": 565.6063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.176, "grad_norm": 0.09488580375909805, "learning_rate": 9.722222222222224e-07, "loss": 0.0074, "num_tokens": 36562867.0, "reward": 1.1112394332885742, "reward_std": 0.23756884038448334, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.6402702927589417, "rewards/format_reward_step": 0.98828125, "step": 165 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.324432083258056e-08, "aux_brier/mean_group_std": 0.15903342189210876, "aux_brier/mean_r": 0.6470854466111328, "aux_brier/n_active_tok": 279.375, "aux_brier/n_groups": 14.875, "aux_brier/n_step_records": 69.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.49993279569892474, "calib/avg_num_step_conf": 9.0859375, "calib/ece": 0.24294302788844624, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3107569721115538, "calib/gap": -0.013543111559139676, "calib/mean_conf": 0.7495669322709163, "calib/mu_c": 0.7443870967741936, "calib/mu_w": 0.7579302083333332, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1874900398406375, "calib/std_conf": 0.22582235742657678, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3010.0, "completions/max_terminated_length": 3010.0, "completions/mean_length": 616.46875, "completions/mean_terminated_length": 621.3228149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 116.0, "epoch": 0.17706666666666668, "grad_norm": 0.027759229764342308, "learning_rate": 9.444444444444445e-07, "loss": 0.0017, "num_tokens": 36826867.0, "reward": 1.2645854949951172, "reward_std": 0.28986796736717224, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6755295991897583, "rewards/format_reward_step": 0.98046875, "step": 166 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.632106599869232e-08, "aux_brier/mean_group_std": 0.1350684005464948, "aux_brier/mean_r": 0.6363918002802433, "aux_brier/n_active_tok": 281.375, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 70.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.640604736878029, "calib/avg_num_step_conf": 8.96484375, "calib/ece": 0.17333333333333328, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3373015873015873, "calib/gap": 0.09453416149068339, "calib/mean_conf": 0.7918253968253969, "calib/mu_c": 0.8259627329192548, "calib/mu_w": 0.7314285714285714, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16313492063492058, "calib/std_conf": 0.18989094946398735, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2106.0, "completions/max_terminated_length": 2106.0, "completions/mean_length": 554.4765625, "completions/mean_terminated_length": 561.0513916015625, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.17813333333333334, "grad_norm": 0.017106177285313606, "learning_rate": 9.166666666666666e-07, "loss": -0.0502, "num_tokens": 37074421.0, "reward": 1.3065160512924194, "reward_std": 0.29185426235198975, "rewards/accuracy_reward_step": 0.62890625, "rewards/final_brier_reward_step": 0.7416890859603882, "rewards/format_reward_step": 0.984375, "step": 167 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.53070181546744e-08, "aux_brier/mean_group_std": 0.13934467885736762, "aux_brier/mean_r": 0.6410674411595916, "aux_brier/n_active_tok": 275.25, "aux_brier/n_groups": 14.9375, "aux_brier/n_step_records": 68.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4796153846153846, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.27373622047244095, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3188976377952756, "calib/gap": -0.009408717948717782, "calib/mean_conf": 0.7840590551181102, "calib/mu_c": 0.7802066666666667, "calib/mu_w": 0.7896153846153845, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2336220472440945, "calib/std_conf": 0.2077644042987041, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 616.578125, "completions/mean_terminated_length": 616.578125, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 0.1792, "grad_norm": 0.029959965497255325, "learning_rate": 8.88888888888889e-07, "loss": 0.0476, "num_tokens": 37336937.0, "reward": 1.2478678226470947, "reward_std": 0.3759251534938812, "rewards/accuracy_reward_step": 0.58984375, "rewards/final_brier_reward_step": 0.6633463501930237, "rewards/format_reward_step": 0.984375, "step": 168 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.2700272189757342e-08, "aux_brier/mean_group_std": 0.12465513351058224, "aux_brier/mean_r": 0.65873699609731, "aux_brier/n_active_tok": 250.125, "aux_brier/n_groups": 12.40625, "aux_brier/n_step_records": 62.53125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5946673848639914, "calib/avg_num_step_conf": 7.9453125, "calib/ece": 0.18968253968253987, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2777777777777778, "calib/gap": 0.05581201185025575, "calib/mean_conf": 0.7712698412698413, "calib/mu_c": 0.7920886075949367, "calib/mu_w": 0.736276595744681, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16698412698412718, "calib/std_conf": 0.2024100196990702, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2575.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 556.6328125, "completions/mean_terminated_length": 561.0157470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.18026666666666666, "grad_norm": 0.07708414644002914, "learning_rate": 8.611111111111112e-07, "loss": 0.0144, "num_tokens": 37583619.0, "reward": 1.2862026691436768, "reward_std": 0.2588883340358734, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7151237726211548, "rewards/format_reward_step": 0.98046875, "step": 169 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -4.777926432275592e-08, "aux_brier/mean_group_std": 0.14195865808078692, "aux_brier/mean_r": 0.6181783078042974, "aux_brier/n_active_tok": 274.5, "aux_brier/n_groups": 12.96875, "aux_brier/n_step_records": 68.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6069466248037677, "calib/avg_num_step_conf": 8.91015625, "calib/ece": 0.24988047808764943, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.398406374501992, "calib/gap": 0.06307953950811085, "calib/mean_conf": 0.8050199203187252, "calib/mu_c": 0.8311564625850341, "calib/mu_w": 0.7680769230769232, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23462151394422315, "calib/std_conf": 0.19069880822845905, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1983.0, "completions/max_terminated_length": 1983.0, "completions/mean_length": 580.921875, "completions/mean_terminated_length": 587.810302734375, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.18133333333333335, "grad_norm": 0.04785623401403427, "learning_rate": 8.333333333333333e-07, "loss": 0.0008, "num_tokens": 37836487.0, "reward": 1.236884593963623, "reward_std": 0.3103271722793579, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6897257566452026, "rewards/format_reward_step": 0.98046875, "step": 170 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -2.701985013509045e-08, "aux_brier/mean_group_std": 0.15296221750582936, "aux_brier/mean_r": 0.6612675712427831, "aux_brier/n_active_tok": 275.125, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 68.78125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5181779035433072, "calib/avg_num_step_conf": 8.6875, "calib/ece": 0.2866274509803922, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.27058823529411763, "calib/gap": 0.01777312992125979, "calib/mean_conf": 0.7639607843137255, "calib/mu_c": 0.7728125, "calib/mu_w": 0.7550393700787402, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27431372549019617, "calib/std_conf": 0.19062229986868795, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2669.0, "completions/max_terminated_length": 2669.0, "completions/mean_length": 566.7421875, "completions/mean_terminated_length": 566.7421875, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.1824, "grad_norm": 0.010317047126591206, "learning_rate": 8.055555555555557e-07, "loss": 0.0214, "num_tokens": 38088469.0, "reward": 1.1608855724334717, "reward_std": 0.2953178286552429, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6513550281524658, "rewards/format_reward_step": 0.99609375, "step": 171 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.7665469099511526e-08, "aux_brier/mean_group_std": 0.13741332457898198, "aux_brier/mean_r": 0.6330897068012902, "aux_brier/n_active_tok": 271.375, "aux_brier/n_groups": 15.03125, "aux_brier/n_step_records": 67.84375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.41749757651294833, "calib/avg_num_step_conf": 8.6953125, "calib/ece": 0.24549407114624516, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.31620553359683795, "calib/gap": -0.050337903337487666, "calib/mean_conf": 0.7799604743083003, "calib/mu_c": 0.7626506024096387, "calib/mu_w": 0.8129885057471263, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1846640316205535, "calib/std_conf": 0.19759322215519504, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2770.0, "completions/max_terminated_length": 2770.0, "completions/mean_length": 545.58203125, "completions/mean_terminated_length": 549.8779296875, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.18346666666666667, "grad_norm": 0.027040479704737663, "learning_rate": 7.777777777777779e-07, "loss": 0.0294, "num_tokens": 38331490.0, "reward": 1.314855933189392, "reward_std": 0.345420777797699, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6891113519668579, "rewards/format_reward_step": 0.98828125, "step": 172 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.2891789538671539e-08, "aux_brier/mean_group_std": 0.1285896110355286, "aux_brier/mean_r": 0.6227357412063919, "aux_brier/n_active_tok": 274.375, "aux_brier/n_groups": 13.78125, "aux_brier/n_step_records": 68.59375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5490783410138249, "calib/avg_num_step_conf": 8.76171875, "calib/ece": 0.22881422924901185, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3557312252964427, "calib/gap": 0.025912442396313518, "calib/mean_conf": 0.8094466403162056, "calib/mu_c": 0.819483870967742, "calib/mu_w": 0.7935714285714285, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2128063241106719, "calib/std_conf": 0.17382152640588622, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1786.0, "completions/max_terminated_length": 1786.0, "completions/mean_length": 570.08984375, "completions/mean_terminated_length": 576.849853515625, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.18453333333333333, "grad_norm": 0.12760783731937408, "learning_rate": 7.5e-07, "loss": -0.0135, "num_tokens": 38580593.0, "reward": 1.2740519046783447, "reward_std": 0.3085237145423889, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6977707147598267, "rewards/format_reward_step": 0.98828125, "step": 173 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 2.3490160833139484e-07, "aux_brier/mean_group_std": 0.15113823536728327, "aux_brier/mean_r": 0.6672366930708439, "aux_brier/n_active_tok": 283.25, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 70.8125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6073516329902148, "calib/avg_num_step_conf": 9.05078125, "calib/ece": 0.287609561752988, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3187250996015936, "calib/gap": 0.08039903418477568, "calib/mean_conf": 0.7703187250996015, "calib/mu_c": 0.811639344262295, "calib/mu_w": 0.7312403100775193, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2859362549800797, "calib/std_conf": 0.21188373965796042, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2806.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 652.22265625, "completions/mean_terminated_length": 657.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.1856, "grad_norm": 0.09726864844560623, "learning_rate": 7.222222222222222e-07, "loss": -0.004, "num_tokens": 38851794.0, "reward": 1.127492904663086, "reward_std": 0.4194108247756958, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6505968570709229, "rewards/format_reward_step": 0.9765625, "step": 174 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.2535407080405125e-08, "aux_brier/mean_group_std": 0.14564231764602587, "aux_brier/mean_r": 0.6539708415512551, "aux_brier/n_active_tok": 267.25, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 66.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.48779402415766054, "calib/avg_num_step_conf": 8.39453125, "calib/ece": 0.340197628458498, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": 0.02293706293706299, "calib/mean_conf": 0.7461264822134388, "calib/mu_c": 0.7590909090909093, "calib/mu_w": 0.7361538461538463, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32577075098814223, "calib/std_conf": 0.22205653904331202, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2630.0, "completions/max_terminated_length": 2630.0, "completions/mean_length": 581.6484375, "completions/mean_terminated_length": 581.6484375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.18666666666666668, "grad_norm": 0.00905072595924139, "learning_rate": 6.944444444444446e-07, "loss": 0.0716, "num_tokens": 39106520.0, "reward": 1.0768344402313232, "reward_std": 0.34601762890815735, "rewards/accuracy_reward_step": 0.4296875, "rewards/final_brier_reward_step": 0.6120253801345825, "rewards/format_reward_step": 0.98828125, "step": 175 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.849028153481761e-08, "aux_brier/mean_group_std": 0.1604823328795227, "aux_brier/mean_r": 0.651032482507284, "aux_brier/n_active_tok": 276.0, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 69.0, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6513650793650794, "calib/avg_num_step_conf": 8.69140625, "calib/ece": 0.29167171314741036, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.32270916334661354, "calib/gap": 0.10686793650793658, "calib/mean_conf": 0.7821131474103585, "calib/mu_c": 0.83576, "calib/mu_w": 0.7288920634920634, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2878884462151394, "calib/std_conf": 0.19660630009616722, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2490.0, "completions/max_terminated_length": 2490.0, "completions/mean_length": 596.6328125, "completions/mean_terminated_length": 598.9725952148438, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.18773333333333334, "grad_norm": 0.18465131521224976, "learning_rate": 6.666666666666667e-07, "loss": -0.0166, "num_tokens": 39363322.0, "reward": 1.146192193031311, "reward_std": 0.307635098695755, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6707066297531128, "rewards/format_reward_step": 0.98046875, "step": 176 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 6.8077137361372e-08, "aux_brier/mean_group_std": 0.15441151606022732, "aux_brier/mean_r": 0.6470214692273081, "aux_brier/n_active_tok": 279.25, "aux_brier/n_groups": 16.71875, "aux_brier/n_step_records": 69.8125, "calib/answer_extract_rate": 0.9453125, "calib/auroc": 0.5139462809917354, "calib/avg_num_step_conf": 9.37890625, "calib/ece": 0.2693801652892562, "calib/final_conf_rate": 0.9453125, "calib/format_rate": 0.9375, "calib/frac_conf_gt_0.9": 0.2809917355371901, "calib/gap": 0.033409090909091055, "calib/mean_conf": 0.7673140495867768, "calib/mu_c": 0.7825, "calib/mu_w": 0.7490909090909089, "calib/nonempty_final_conf_rate": 0.9453125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.24561983471074378, "calib/std_conf": 0.19413672157042633, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 2800.0, "completions/max_terminated_length": 2800.0, "completions/mean_length": 585.5625, "completions/mean_terminated_length": 604.4515991210938, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.1888, "grad_norm": 0.1904604136943817, "learning_rate": 6.388888888888889e-07, "loss": 0.0073, "num_tokens": 39617058.0, "reward": 1.144178867340088, "reward_std": 0.2834002375602722, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6392156481742859, "rewards/format_reward_step": 0.9375, "step": 177 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.252148153288446e-07, "aux_brier/mean_group_std": 0.1472377586394715, "aux_brier/mean_r": 0.6761584433687705, "aux_brier/n_active_tok": 267.25, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 66.8125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5442357424799237, "calib/avg_num_step_conf": 8.66796875, "calib/ece": 0.17414541832669317, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2549800796812749, "calib/gap": 0.06753171362460875, "calib/mean_conf": 0.7711175298804781, "calib/mu_c": 0.7961392405063292, "calib/mu_w": 0.7286075268817205, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15789043824701193, "calib/std_conf": 0.19291150235408994, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2521.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 560.4375, "completions/mean_terminated_length": 567.0830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.18986666666666666, "grad_norm": 0.1626780480146408, "learning_rate": 6.111111111111112e-07, "loss": 0.0102, "num_tokens": 39866602.0, "reward": 1.2890516519546509, "reward_std": 0.34410905838012695, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7265191078186035, "rewards/format_reward_step": 0.98046875, "step": 178 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 7.221143408925323e-08, "aux_brier/mean_group_std": 0.13279017732055964, "aux_brier/mean_r": 0.6330172093311681, "aux_brier/n_active_tok": 282.5, "aux_brier/n_groups": 15.375, "aux_brier/n_step_records": 70.625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5046073717948718, "calib/avg_num_step_conf": 9.0, "calib/ece": 0.2558333333333333, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.36904761904761907, "calib/gap": 0.00684294871794866, "calib/mean_conf": 0.7986111111111112, "calib/mu_c": 0.8012179487179488, "calib/mu_w": 0.7943750000000002, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21769841269841267, "calib/std_conf": 0.19373740500679115, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 624.0078125, "completions/mean_terminated_length": 626.4549560546875, "completions/min_length": 0.0, "completions/min_terminated_length": 257.0, "epoch": 0.19093333333333334, "grad_norm": 0.022816849872469902, "learning_rate": 5.833333333333334e-07, "loss": 0.0357, "num_tokens": 40132612.0, "reward": 1.270725965499878, "reward_std": 0.37764665484428406, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6844663619995117, "rewards/format_reward_step": 0.98046875, "step": 179 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.7315029614967115e-08, "aux_brier/mean_group_std": 0.1667547028228673, "aux_brier/mean_r": 0.616798028802157, "aux_brier/n_active_tok": 293.125, "aux_brier/n_groups": 15.71875, "aux_brier/n_step_records": 73.28125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5077922077922078, "calib/avg_num_step_conf": 9.359375, "calib/ece": 0.2565654618473896, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.40562248995983935, "calib/gap": 0.004985933014353794, "calib/mean_conf": 0.801506827309237, "calib/mu_c": 0.8034090909090909, "calib/mu_w": 0.7984231578947371, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21979919678714868, "calib/std_conf": 0.20997746750786947, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2587.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 671.48828125, "completions/mean_terminated_length": 676.7755737304688, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.192, "grad_norm": 0.04850844293832779, "learning_rate": 5.555555555555555e-07, "loss": 0.0457, "num_tokens": 40408369.0, "reward": 1.259287714958191, "reward_std": 0.3425866961479187, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6699631214141846, "rewards/format_reward_step": 0.97265625, "step": 180 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.919119154372931e-08, "aux_brier/mean_group_std": 0.13798272408351808, "aux_brier/mean_r": 0.6595707949605677, "aux_brier/n_active_tok": 265.25, "aux_brier/n_groups": 14.90625, "aux_brier/n_step_records": 66.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.6024305555555555, "calib/avg_num_step_conf": 8.4921875, "calib/ece": 0.25972222222222213, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3134920634920635, "calib/gap": 0.07095454545454549, "calib/mean_conf": 0.7741666666666666, "calib/mu_c": 0.8079545454545455, "calib/mu_w": 0.737, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.25503968253968246, "calib/std_conf": 0.18753967834136046, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2363.0, "completions/max_terminated_length": 2363.0, "completions/mean_length": 544.82421875, "completions/mean_terminated_length": 549.1141967773438, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.19306666666666666, "grad_norm": 0.01183368917554617, "learning_rate": 5.277777777777779e-07, "loss": 0.0362, "num_tokens": 40654108.0, "reward": 1.1742548942565918, "reward_std": 0.4138270318508148, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6735824346542358, "rewards/format_reward_step": 0.98046875, "step": 181 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.9778341520115816e-08, "aux_brier/mean_group_std": 0.15558871270294192, "aux_brier/mean_r": 0.6306201047707726, "aux_brier/n_active_tok": 285.0, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 71.25, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6228234477974331, "calib/avg_num_step_conf": 9.359375, "calib/ece": 0.18213991935483878, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3467741935483871, "calib/gap": 0.09777741935483875, "calib/mean_conf": 0.7948818548387097, "calib/mu_c": 0.8315483870967741, "calib/mu_w": 0.7337709677419354, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17601088709677426, "calib/std_conf": 0.1862349585650365, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2841.0, "completions/max_terminated_length": 2841.0, "completions/mean_length": 591.59765625, "completions/mean_terminated_length": 598.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 240.0, "epoch": 0.19413333333333332, "grad_norm": 0.020462146028876305, "learning_rate": 5.000000000000001e-07, "loss": 0.0252, "num_tokens": 40911717.0, "reward": 1.270979404449463, "reward_std": 0.31269222497940063, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.7245425581932068, "rewards/format_reward_step": 0.96875, "step": 182 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 5.1663264583057256e-08, "aux_brier/mean_group_std": 0.14147481098174966, "aux_brier/mean_r": 0.6280245158192589, "aux_brier/n_active_tok": 290.375, "aux_brier/n_groups": 16.71875, "aux_brier/n_step_records": 72.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5048015364916774, "calib/avg_num_step_conf": 9.23828125, "calib/ece": 0.25853968253968274, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.30952380952380953, "calib/gap": 0.02162458386683752, "calib/mean_conf": 0.7864761904761906, "calib/mu_c": 0.7959154929577466, "calib/mu_w": 0.774290909090909, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24076190476190493, "calib/std_conf": 0.18787507307756457, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2623.0, "completions/max_terminated_length": 2623.0, "completions/mean_length": 637.36328125, "completions/mean_terminated_length": 639.86279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.1952, "grad_norm": 0.17357149720191956, "learning_rate": 4.7222222222222226e-07, "loss": 0.0275, "num_tokens": 41181562.0, "reward": 1.2112045288085938, "reward_std": 0.38617032766342163, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6651306748390198, "rewards/format_reward_step": 0.98046875, "step": 183 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.02241734259934e-09, "aux_brier/mean_group_std": 0.150533896072864, "aux_brier/mean_r": 0.6475943549764245, "aux_brier/n_active_tok": 270.625, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 67.65625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5664073451079338, "calib/avg_num_step_conf": 8.5859375, "calib/ece": 0.19061752988047811, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3346613545816733, "calib/gap": 0.045447154471544904, "calib/mean_conf": 0.7980278884462152, "calib/mu_c": 0.8137804878048781, "calib/mu_w": 0.7683333333333332, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.16762948207171313, "calib/std_conf": 0.17826464060704933, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2426.0, "completions/max_terminated_length": 2426.0, "completions/mean_length": 562.30859375, "completions/mean_terminated_length": 566.7362060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.19626666666666667, "grad_norm": 0.1029668003320694, "learning_rate": 4.444444444444445e-07, "loss": -0.0193, "num_tokens": 41430793.0, "reward": 1.3204048871994019, "reward_std": 0.35812973976135254, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.7269323468208313, "rewards/format_reward_step": 0.98046875, "step": 184 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.3424797128125388e-08, "aux_brier/mean_group_std": 0.14131096015246147, "aux_brier/mean_r": 0.6084639370652464, "aux_brier/n_active_tok": 313.0, "aux_brier/n_groups": 21.65625, "aux_brier/n_step_records": 78.25, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5455365819797307, "calib/avg_num_step_conf": 10.0546875, "calib/ece": 0.23742653061224497, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.3551020408163265, "calib/gap": 0.021431972789115572, "calib/mean_conf": 0.8002061224489796, "calib/mu_c": 0.8087789115646259, "calib/mu_w": 0.7873469387755103, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21881632653061234, "calib/std_conf": 0.19429516616521544, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2968.0, "completions/max_terminated_length": 2968.0, "completions/mean_length": 634.73046875, "completions/mean_terminated_length": 642.2569580078125, "completions/min_length": 0.0, "completions/min_terminated_length": 219.0, "epoch": 0.19733333333333333, "grad_norm": 0.15899337828159332, "learning_rate": 4.1666666666666667e-07, "loss": 0.0483, "num_tokens": 41700204.0, "reward": 1.2162706851959229, "reward_std": 0.31484103202819824, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6619581580162048, "rewards/format_reward_step": 0.953125, "step": 185 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -5.7430997069118916e-08, "aux_brier/mean_group_std": 0.14876179498089898, "aux_brier/mean_r": 0.6432150913401712, "aux_brier/n_active_tok": 273.875, "aux_brier/n_groups": 15.875, "aux_brier/n_step_records": 68.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5766349661281919, "calib/avg_num_step_conf": 8.65234375, "calib/ece": 0.22332015810276679, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.34782608695652173, "calib/gap": 0.03175742574257445, "calib/mean_conf": 0.7848221343873518, "calib/mu_c": 0.7975000000000001, "calib/mu_w": 0.7657425742574256, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.20367588932806324, "calib/std_conf": 0.17764285567987392, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2887.0, "completions/max_terminated_length": 2887.0, "completions/mean_length": 582.37109375, "completions/mean_terminated_length": 584.6549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.1984, "grad_norm": 0.10439177602529526, "learning_rate": 3.8888888888888895e-07, "loss": 0.0205, "num_tokens": 41954331.0, "reward": 1.2582097053527832, "reward_std": 0.34368404746055603, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.6969015598297119, "rewards/format_reward_step": 0.98046875, "step": 186 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.863037581630095e-09, "aux_brier/mean_group_std": 0.15976059086781008, "aux_brier/mean_r": 0.6155475299853437, "aux_brier/n_active_tok": 306.375, "aux_brier/n_groups": 19.34375, "aux_brier/n_step_records": 76.59375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5428374474789915, "calib/avg_num_step_conf": 10.0234375, "calib/ece": 0.2904435483870968, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.32661290322580644, "calib/gap": 0.02077731092436963, "calib/mean_conf": 0.7790725806451613, "calib/mu_c": 0.7884558823529411, "calib/mu_w": 0.7676785714285714, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26056451612903225, "calib/std_conf": 0.21440922860897363, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 641.9921875, "completions/mean_terminated_length": 649.6047973632812, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.19946666666666665, "grad_norm": 0.1448356658220291, "learning_rate": 3.611111111111111e-07, "loss": 0.0393, "num_tokens": 42220225.0, "reward": 1.1763032674789429, "reward_std": 0.3570302724838257, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6427128911018372, "rewards/format_reward_step": 0.96875, "step": 187 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 3.228870604221612e-08, "aux_brier/mean_group_std": 0.1515548729685348, "aux_brier/mean_r": 0.62776678367857, "aux_brier/n_active_tok": 286.25, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 71.5625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.48477280610475204, "calib/avg_num_step_conf": 9.37890625, "calib/ece": 0.24790322580645163, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.375, "calib/gap": -0.011956989247311811, "calib/mean_conf": 0.8117741935483872, "calib/mu_c": 0.8072903225806451, "calib/mu_w": 0.819247311827957, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21733870967741936, "calib/std_conf": 0.175609759754758, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2076.0, "completions/max_terminated_length": 2076.0, "completions/mean_length": 594.5, "completions/mean_terminated_length": 608.7680053710938, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.20053333333333334, "grad_norm": 0.01984618976712227, "learning_rate": 3.3333333333333335e-07, "loss": -0.0647, "num_tokens": 42476489.0, "reward": 1.2597613334655762, "reward_std": 0.32933613657951355, "rewards/accuracy_reward_step": 0.609375, "rewards/final_brier_reward_step": 0.6718578338623047, "rewards/format_reward_step": 0.96484375, "step": 188 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.169564063460918e-08, "aux_brier/mean_group_std": 0.1490267539904431, "aux_brier/mean_r": 0.6470431485990853, "aux_brier/n_active_tok": 261.875, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 65.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5916586768935762, "calib/avg_num_step_conf": 8.21875, "calib/ece": 0.21291141732283467, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.30708661417322836, "calib/gap": 0.09119159475870897, "calib/mean_conf": 0.7819704724409449, "calib/mu_c": 0.8196677852348994, "calib/mu_w": 0.7284761904761904, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20413385826771657, "calib/std_conf": 0.2037642225222371, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2977.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 566.8671875, "completions/mean_terminated_length": 566.8671875, "completions/min_length": 205.0, "completions/min_terminated_length": 205.0, "epoch": 0.2016, "grad_norm": 0.02794613130390644, "learning_rate": 3.055555555555556e-07, "loss": 0.0251, "num_tokens": 42729375.0, "reward": 1.2524230480194092, "reward_std": 0.2862406373023987, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.7128171324729919, "rewards/format_reward_step": 0.984375, "step": 189 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.4173080308851365e-07, "aux_brier/mean_group_std": 0.1495940180538926, "aux_brier/mean_r": 0.6228908692663656, "aux_brier/n_active_tok": 295.625, "aux_brier/n_groups": 15.65625, "aux_brier/n_step_records": 73.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5240987589467463, "calib/avg_num_step_conf": 9.5546875, "calib/ece": 0.25219685039370077, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3937007874015748, "calib/gap": 0.014606737146234283, "calib/mean_conf": 0.7954409448818898, "calib/mu_c": 0.8010191082802549, "calib/mu_w": 0.7864123711340206, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21476377952755904, "calib/std_conf": 0.20944964078342088, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1818.0, "completions/max_terminated_length": 1818.0, "completions/mean_length": 632.89453125, "completions/mean_terminated_length": 635.3765258789062, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.20266666666666666, "grad_norm": 0.016085034236311913, "learning_rate": 2.7777777777777776e-07, "loss": -0.0013, "num_tokens": 42997004.0, "reward": 1.2818992137908936, "reward_std": 0.2702663540840149, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.6900966763496399, "rewards/format_reward_step": 0.9921875, "step": 190 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -8.638660939364051e-09, "aux_brier/mean_group_std": 0.15581892159075297, "aux_brier/mean_r": 0.6131782077556764, "aux_brier/n_active_tok": 290.25, "aux_brier/n_groups": 16.3125, "aux_brier/n_step_records": 72.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5088487332339792, "calib/avg_num_step_conf": 9.19921875, "calib/ece": 0.34322834645669287, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4094488188976378, "calib/gap": -0.0022379533035271626, "calib/mean_conf": 0.8163779527559055, "calib/mu_c": 0.8153030303030303, "calib/mu_w": 0.8175409836065575, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.31996062992125984, "calib/std_conf": 0.18425918621033635, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2437.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 571.08203125, "completions/mean_terminated_length": 573.3215942382812, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.20373333333333332, "grad_norm": 0.01850549317896366, "learning_rate": 2.5000000000000004e-07, "loss": 0.0274, "num_tokens": 43247369.0, "reward": 1.1673165559768677, "reward_std": 0.3018760681152344, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6223914623260498, "rewards/format_reward_step": 0.9921875, "step": 191 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 1.208101690422514e-07, "aux_brier/mean_group_std": 0.139313720991278, "aux_brier/mean_r": 0.6547211840843482, "aux_brier/n_active_tok": 283.375, "aux_brier/n_groups": 16.6875, "aux_brier/n_step_records": 70.84375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5362255820176614, "calib/avg_num_step_conf": 9.25, "calib/ece": 0.19040160642570278, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.27710843373493976, "calib/gap": 0.04529636071715282, "calib/mean_conf": 0.7708835341365461, "calib/mu_c": 0.7892567567567568, "calib/mu_w": 0.743960396039604, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18345381526104415, "calib/std_conf": 0.19426729425311287, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2862.0, "completions/max_terminated_length": 2862.0, "completions/mean_length": 616.3046875, "completions/mean_terminated_length": 623.6126708984375, "completions/min_length": 0.0, "completions/min_terminated_length": 157.0, "epoch": 0.2048, "grad_norm": 0.044234082102775574, "learning_rate": 2.2222222222222224e-07, "loss": 0.0556, "num_tokens": 43510119.0, "reward": 1.2375504970550537, "reward_std": 0.3908289074897766, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6923894882202148, "rewards/format_reward_step": 0.97265625, "step": 192 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -7.288471756683457e-09, "aux_brier/mean_group_std": 0.1441714346823197, "aux_brier/mean_r": 0.6247277574411615, "aux_brier/n_active_tok": 297.5, "aux_brier/n_groups": 18.71875, "aux_brier/n_step_records": 74.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.43677727501256913, "calib/avg_num_step_conf": 9.71484375, "calib/ece": 0.2895652173913044, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.3201581027667984, "calib/gap": -0.02767408245349412, "calib/mean_conf": 0.7811067193675889, "calib/mu_c": 0.7683088235294119, "calib/mu_w": 0.795982905982906, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.26656126482213444, "calib/std_conf": 0.2006303319863496, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2571.0, "completions/max_terminated_length": 2571.0, "completions/mean_length": 616.9765625, "completions/mean_terminated_length": 621.8346557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 262.0, "epoch": 0.20586666666666667, "grad_norm": 0.036437131464481354, "learning_rate": 1.9444444444444447e-07, "loss": 0.0183, "num_tokens": 43773777.0, "reward": 1.177616000175476, "reward_std": 0.37303587794303894, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.6245265603065491, "rewards/format_reward_step": 0.98046875, "step": 193 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.5224946470215883e-08, "aux_brier/mean_group_std": 0.12996394249541401, "aux_brier/mean_r": 0.6584683010816339, "aux_brier/n_active_tok": 263.875, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 65.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5465963566634707, "calib/avg_num_step_conf": 8.3671875, "calib/ece": 0.2622834645669291, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25984251968503935, "calib/gap": 0.015891339085969847, "calib/mean_conf": 0.7814173228346457, "calib/mu_c": 0.787986577181208, "calib/mu_w": 0.7720952380952382, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2285433070866141, "calib/std_conf": 0.19218890153478618, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2441.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 555.1484375, "completions/mean_terminated_length": 557.3255004882812, "completions/min_length": 0.0, "completions/min_terminated_length": 216.0, "epoch": 0.20693333333333333, "grad_norm": 0.07531143724918365, "learning_rate": 1.6666666666666668e-07, "loss": 0.028, "num_tokens": 44021839.0, "reward": 1.24697744846344, "reward_std": 0.26652994751930237, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6832222938537598, "rewards/format_reward_step": 0.98828125, "step": 194 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.7532560276675184e-08, "aux_brier/mean_group_std": 0.13878589888841797, "aux_brier/mean_r": 0.647171167700865, "aux_brier/n_active_tok": 273.125, "aux_brier/n_groups": 15.78125, "aux_brier/n_step_records": 68.28125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5282556065239551, "calib/avg_num_step_conf": 8.6796875, "calib/ece": 0.2525296442687747, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.33992094861660077, "calib/gap": 0.025380988786952274, "calib/mean_conf": 0.8005928853754941, "calib/mu_c": 0.8115277777777778, "calib/mu_w": 0.7861467889908256, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24197628458498027, "calib/std_conf": 0.1758076907848614, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 565.296875, "completions/mean_terminated_length": 567.5137329101562, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.208, "grad_norm": 0.019573550671339035, "learning_rate": 1.3888888888888888e-07, "loss": 0.0146, "num_tokens": 44272539.0, "reward": 1.2253321409225464, "reward_std": 0.3119302988052368, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6747660040855408, "rewards/format_reward_step": 0.98828125, "step": 195 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -1.1924304044175216e-08, "aux_brier/mean_group_std": 0.1442683319151691, "aux_brier/mean_r": 0.650264250368892, "aux_brier/n_active_tok": 244.125, "aux_brier/n_groups": 11.59375, "aux_brier/n_step_records": 61.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.520483870967742, "calib/avg_num_step_conf": 7.64453125, "calib/ece": 0.24245490196078437, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3568627450980392, "calib/gap": -0.0019225806451612648, "calib/mean_conf": 0.8092313725490197, "calib/mu_c": 0.8084774193548387, "calib/mu_w": 0.8104, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22192156862745105, "calib/std_conf": 0.1618091995375382, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1337.0, "completions/max_terminated_length": 1337.0, "completions/mean_length": 492.14453125, "completions/mean_terminated_length": 494.07452392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.20906666666666668, "grad_norm": 0.035324521362781525, "learning_rate": 1.1111111111111112e-07, "loss": 0.0136, "num_tokens": 44501072.0, "reward": 1.2763314247131348, "reward_std": 0.22654297947883606, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6912633180618286, "rewards/format_reward_step": 0.99609375, "step": 196 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 8.983562782560739e-10, "aux_brier/mean_group_std": 0.13207066531558903, "aux_brier/mean_r": 0.6420445812513388, "aux_brier/n_active_tok": 268.25, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 67.0625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.48923577235772364, "calib/avg_num_step_conf": 8.6171875, "calib/ece": 0.3347217741935485, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.35080645161290325, "calib/gap": 0.0008433821138212894, "calib/mean_conf": 0.8077862903225806, "calib/mu_c": 0.8082113821138213, "calib/mu_w": 0.807368, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32327016129032266, "calib/std_conf": 0.17713087098867333, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2888.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 629.13671875, "completions/mean_terminated_length": 631.6039428710938, "completions/min_length": 0.0, "completions/min_terminated_length": 218.0, "epoch": 0.21013333333333334, "grad_norm": 0.01583663374185562, "learning_rate": 8.333333333333334e-08, "loss": 0.1097, "num_tokens": 44767187.0, "reward": 1.1133050918579102, "reward_std": 0.3405642807483673, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6016573905944824, "rewards/format_reward_step": 0.96484375, "step": 197 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.454066207861597e-08, "aux_brier/mean_group_std": 0.13516981325044658, "aux_brier/mean_r": 0.639746649733349, "aux_brier/n_active_tok": 268.625, "aux_brier/n_groups": 13.65625, "aux_brier/n_step_records": 67.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5288235294117648, "calib/avg_num_step_conf": 8.40625, "calib/ece": 0.24091269841269847, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.30952380952380953, "calib/gap": 0.00979999999999992, "calib/mean_conf": 0.7958333333333334, "calib/mu_c": 0.7998000000000001, "calib/mu_w": 0.7900000000000001, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2207539682539683, "calib/std_conf": 0.17411368526576365, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2590.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 553.3671875, "completions/mean_terminated_length": 557.7244262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.2112, "grad_norm": 0.008425973355770111, "learning_rate": 5.555555555555556e-08, "loss": 0.0243, "num_tokens": 45014233.0, "reward": 1.2487266063690186, "reward_std": 0.3016568422317505, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6824066042900085, "rewards/format_reward_step": 0.984375, "step": 198 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": 4.2256545124130795e-08, "aux_brier/mean_group_std": 0.14205363106055785, "aux_brier/mean_r": 0.6133012587203641, "aux_brier/n_active_tok": 294.125, "aux_brier/n_groups": 15.90625, "aux_brier/n_step_records": 73.53125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5032858351226495, "calib/avg_num_step_conf": 9.35546875, "calib/ece": 0.27801200000000004, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.408, "calib/gap": 0.00547966686186474, "calib/mean_conf": 0.8238520000000001, "calib/mu_c": 0.8262411347517731, "calib/mu_w": 0.8207614678899083, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26893200000000006, "calib/std_conf": 0.16464069392467948, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2810.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 637.6015625, "completions/mean_terminated_length": 642.6220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 181.0, "epoch": 0.21226666666666666, "grad_norm": 0.04094070941209793, "learning_rate": 2.777777777777778e-08, "loss": 0.0246, "num_tokens": 45281659.0, "reward": 1.2007229328155518, "reward_std": 0.36863645911216736, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6466419100761414, "rewards/format_reward_step": 0.9765625, "step": 199 }, { "aux_brier/lambda": 0.020000000000000007, "aux_brier/loss": -6.43038122749573e-10, "aux_brier/mean_group_std": 0.14059694998275904, "aux_brier/mean_r": 0.6255956283808735, "aux_brier/n_active_tok": 274.75, "aux_brier/n_groups": 14.8125, "aux_brier/n_step_records": 68.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.453313351143546, "calib/avg_num_step_conf": 8.640625, "calib/ece": 0.2655555555555556, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3333333333333333, "calib/gap": -0.02687430768228294, "calib/mean_conf": 0.8033333333333333, "calib/mu_c": 0.7923489932885907, "calib/mu_w": 0.8192233009708736, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23880952380952386, "calib/std_conf": 0.17190113437671084, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2469.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 590.28515625, "completions/mean_terminated_length": 594.9330444335938, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 0.21333333333333335, "grad_norm": 0.025699615478515625, "learning_rate": 0.0, "loss": 0.0482, "num_tokens": 45540820.0, "reward": 1.2393033504486084, "reward_std": 0.2776757776737213, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.660338282585144, "rewards/format_reward_step": 0.984375, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.027927971099852584, "train_runtime": 15555.4807, "train_samples_per_second": 3.291, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 45540820, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }