{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21333333333333335, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "aux_brier/lambda": 0.05, "aux_brier/loss": 5.791089203391117e-07, "aux_brier/mean_group_std": 0.06289231620091193, "aux_brier/mean_r": 0.4665906001184907, "aux_brier/n_active_tok": 24.615384615384617, "aux_brier/n_groups": 5.3076923076923075, "aux_brier/n_step_records": 6.153846153846154, "calib/answer_extract_rate": 0.08203125, "calib/auroc": 0.6944444444444445, "calib/avg_num_step_conf": 0.3359375, "calib/ece": 0.6230769230769231, "calib/final_conf_rate": 0.05078125, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.03861111111111115, "calib/mean_conf": 0.9307692307692309, "calib/mu_c": 0.9575, "calib/mu_w": 0.9188888888888889, "calib/nonempty_final_conf_rate": 0.05078125, "calib/nonempty_reasoning_rate": 0.09765625, "calib/nonempty_step_conf_rate": 0.0703125, "calib/pce": 0.6230769230769231, "calib/std_conf": 0.07965903671384378, "calib/step_conf_rate": 0.0703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 613.67578125, "completions/mean_terminated_length": 674.2532348632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0010666666666666667, "grad_norm": 0.16193392872810364, "learning_rate": 2.5000000000000004e-07, "loss": 0.0318, "num_tokens": 264685.0, "reward": 0.04124843701720238, "reward_std": 0.0838509351015091, "rewards/accuracy_reward_step": 0.015625, "rewards/final_brier_reward_step": 0.01655624993145466, "rewards/format_reward_step": 0.04296875, "step": 1 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 2.461345396504181e-08, "aux_brier/mean_group_std": 0.046398653263787254, "aux_brier/mean_r": 0.430243897442093, "aux_brier/n_active_tok": 28.42105263157895, "aux_brier/n_groups": 5.894736842105263, "aux_brier/n_step_records": 7.105263157894737, "calib/answer_extract_rate": 0.13671875, "calib/auroc": 0.5338345864661654, "calib/avg_num_step_conf": 0.55078125, "calib/ece": 0.6261538461538463, "calib/final_conf_rate": 0.1015625, "calib/format_rate": 0.08984375, "calib/frac_conf_gt_0.9": 0.7692307692307693, "calib/gap": 0.002406015037593856, "calib/mean_conf": 0.8953846153846153, "calib/mu_c": 0.897142857142857, "calib/mu_w": 0.8947368421052632, "calib/nonempty_final_conf_rate": 0.1015625, "calib/nonempty_reasoning_rate": 0.14453125, "calib/nonempty_step_conf_rate": 0.109375, "calib/pce": 0.6261538461538463, "calib/std_conf": 0.18653172073466937, "calib/step_conf_rate": 0.109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0546875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 646.4609375, "completions/mean_terminated_length": 683.8594970703125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0021333333333333334, "grad_norm": 0.006251324899494648, "learning_rate": 5.000000000000001e-07, "loss": 0.0643, "num_tokens": 533467.0, "reward": 0.08358447253704071, "reward_std": 0.15892045199871063, "rewards/accuracy_reward_step": 0.03125, "rewards/final_brier_reward_step": 0.02965039201080799, "rewards/format_reward_step": 0.08984375, "step": 2 }, { "aux_brier/lambda": 0.05, "aux_brier/loss": 3.2406800544647144e-08, "aux_brier/mean_group_std": 0.0121952197767889, "aux_brier/mean_r": 0.4305657651894744, "aux_brier/n_active_tok": 16.571428571428573, "aux_brier/n_groups": 3.5, "aux_brier/n_step_records": 4.142857142857143, "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.234375, "calib/ece": 0.9137500000000001, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.875, "calib/mean_conf": 0.9137500000000001, "calib/mu_c": NaN, "calib/mu_w": 0.9137500000000001, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.08203125, "calib/nonempty_step_conf_rate": 0.06640625, "calib/pce": 0.9137500000000001, "calib/std_conf": 0.12267207302397722, "calib/step_conf_rate": 0.06640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.11328125, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 680.3984375, "completions/mean_terminated_length": 767.321533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0032, "grad_norm": 0.37062835693359375, "learning_rate": 7.5e-07, "loss": 0.0115, "num_tokens": 812905.0, "reward": 0.01679697260260582, "reward_std": 0.039274316281080246, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.004687890410423279, "rewards/format_reward_step": 0.03125, "step": 3 }, { "aux_brier/lambda": 0.049999999999999996, "aux_brier/loss": -3.602478569421562e-09, "aux_brier/mean_group_std": 0.009702237574707062, "aux_brier/mean_r": 0.3583118615472482, "aux_brier/n_active_tok": 21.714285714285715, "aux_brier/n_groups": 4.714285714285714, "aux_brier/n_step_records": 5.428571428571429, "calib/answer_extract_rate": 0.05859375, "calib/auroc": 0.4666666666666667, "calib/avg_num_step_conf": 0.16015625, "calib/ece": 0.6024999999999999, "calib/final_conf_rate": 0.03125, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 1.0, "calib/gap": -0.0013333333333334085, "calib/mean_conf": 0.9774999999999999, "calib/mu_c": 0.9766666666666666, "calib/mu_w": 0.978, "calib/nonempty_final_conf_rate": 0.03125, "calib/nonempty_reasoning_rate": 0.06640625, "calib/nonempty_step_conf_rate": 0.03125, "calib/pce": 0.6024999999999999, "calib/std_conf": 0.017139136501002624, "calib/step_conf_rate": 0.03125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08984375, "completions/max_length": 2833.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 699.56640625, "completions/mean_terminated_length": 768.622314453125, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.004266666666666667, "grad_norm": 0.0027342927642166615, "learning_rate": 1.0000000000000002e-06, "loss": 0.0228, "num_tokens": 1098162.0, "reward": 0.02265048772096634, "reward_std": 0.05369918793439865, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.004664453212171793, "rewards/format_reward_step": 0.01953125, "step": 4 }, { "aux_brier/lambda": 0.049999999999999996, "aux_brier/loss": 0.0, "aux_brier/mean_group_std": 0.0, "aux_brier/mean_r": 0.6531497392606985, "aux_brier/n_active_tok": 13.333333333333334, "aux_brier/n_groups": 3.3333333333333335, "aux_brier/n_step_records": 3.3333333333333335, "calib/answer_extract_rate": 0.046875, "calib/avg_num_step_conf": 0.078125, "calib/ece": 0.9357142857142857, "calib/final_conf_rate": 0.02734375, "calib/format_rate": 0.01953125, "calib/frac_conf_gt_0.9": 0.7142857142857143, "calib/mean_conf": 0.9357142857142857, "calib/mu_c": NaN, "calib/mu_w": 0.9357142857142857, "calib/nonempty_final_conf_rate": 0.02734375, "calib/nonempty_reasoning_rate": 0.046875, "calib/nonempty_step_conf_rate": 0.0234375, "calib/pce": 0.9357142857142857, "calib/std_conf": 0.046246897303538974, "calib/step_conf_rate": 0.0234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2945.0, "completions/max_terminated_length": 2945.0, "completions/mean_length": 652.7890625, "completions/mean_terminated_length": 723.437255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.005333333333333333, "grad_norm": 0.0021710810251533985, "learning_rate": 1.25e-06, "loss": 0.0131, "num_tokens": 1371964.0, "reward": 0.01028769463300705, "reward_std": 0.02496844157576561, "rewards/accuracy_reward_step": 0.0, "rewards/final_brier_reward_step": 0.002088281325995922, "rewards/format_reward_step": 0.01953125, "step": 5 }, { "aux_brier/lambda": 0.05, "aux_brier/loss": -1.7029898761019728e-09, "aux_brier/mean_group_std": 0.006566845763957197, "aux_brier/mean_r": 0.48239234059468156, "aux_brier/n_active_tok": 25.714285714285715, "aux_brier/n_groups": 6.071428571428571, "aux_brier/n_step_records": 6.428571428571429, "calib/answer_extract_rate": 0.109375, "calib/auroc": 0.8717948717948718, "calib/avg_num_step_conf": 0.3828125, "calib/ece": 0.706425, "calib/final_conf_rate": 0.0625, "calib/format_rate": 0.04296875, "calib/frac_conf_gt_0.9": 0.875, "calib/gap": 0.10593846153846154, "calib/mean_conf": 0.893925, "calib/mu_c": 0.98, "calib/mu_w": 0.8740615384615384, "calib/nonempty_final_conf_rate": 0.0625, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.706425, "calib/std_conf": 0.2299773562223029, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 3018.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 601.28515625, "completions/mean_terminated_length": 652.2415161132812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0064, "grad_norm": 0.0033739216160029173, "learning_rate": 1.5e-06, "loss": -0.0025, "num_tokens": 1631845.0, "reward": 0.03785628452897072, "reward_std": 0.05977939814329147, "rewards/accuracy_reward_step": 0.01171875, "rewards/final_brier_reward_step": 0.018612641841173172, "rewards/format_reward_step": 0.04296875, "step": 6 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -2.053478910722463e-09, "aux_brier/mean_group_std": 0.019962047751666122, "aux_brier/mean_r": 0.48358245428526425, "aux_brier/n_active_tok": 19.80952380952381, "aux_brier/n_groups": 4.523809523809524, "aux_brier/n_step_records": 4.9523809523809526, "calib/answer_extract_rate": 0.09765625, "calib/auroc": 0.45999999999999996, "calib/avg_num_step_conf": 0.4140625, "calib/ece": 0.7154999999999998, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.0625, "calib/frac_conf_gt_0.9": 0.9, "calib/gap": -0.0019999999999997797, "calib/mean_conf": 0.9654999999999999, "calib/mu_c": 0.9640000000000001, "calib/mu_w": 0.9659999999999999, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.12890625, "calib/nonempty_step_conf_rate": 0.09375, "calib/pce": 0.7154999999999998, "calib/std_conf": 0.03943031828428475, "calib/step_conf_rate": 0.09375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 680.328125, "completions/mean_terminated_length": 741.1233520507812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.007466666666666667, "grad_norm": 0.09639103710651398, "learning_rate": 1.75e-06, "loss": 0.0245, "num_tokens": 1913433.0, "reward": 0.055225878953933716, "reward_std": 0.10832781344652176, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.017778515815734863, "rewards/format_reward_step": 0.0625, "step": 7 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -1.7644980783160236e-06, "aux_brier/mean_group_std": 0.010382280017897379, "aux_brier/mean_r": 0.5579407203866258, "aux_brier/n_active_tok": 24.75, "aux_brier/n_groups": 5.125, "aux_brier/n_step_records": 6.1875, "calib/answer_extract_rate": 0.1015625, "calib/auroc": 0.5654761904761905, "calib/avg_num_step_conf": 0.38671875, "calib/ece": 0.6050000000000001, "calib/final_conf_rate": 0.078125, "calib/format_rate": 0.05859375, "calib/frac_conf_gt_0.9": 0.65, "calib/gap": -1.1102230246251565e-16, "calib/mean_conf": 0.8099999999999998, "calib/mu_c": 0.81, "calib/mu_w": 0.8100000000000002, "calib/nonempty_final_conf_rate": 0.078125, "calib/nonempty_reasoning_rate": 0.1171875, "calib/nonempty_step_conf_rate": 0.08203125, "calib/pce": 0.5575000000000001, "calib/std_conf": 0.27910571473905726, "calib/step_conf_rate": 0.08203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 3065.0, "completions/max_terminated_length": 3065.0, "completions/mean_length": 652.69921875, "completions/mean_terminated_length": 711.0255126953125, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.008533333333333334, "grad_norm": 0.005197752732783556, "learning_rate": 2.0000000000000003e-06, "loss": 0.0113, "num_tokens": 2187036.0, "reward": 0.059211522340774536, "reward_std": 0.130070760846138, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.025908593088388443, "rewards/format_reward_step": 0.05859375, "step": 8 }, { "aux_brier/lambda": 0.05, "aux_brier/loss": 5.528596944351158e-08, "aux_brier/mean_group_std": 0.03271103883501513, "aux_brier/mean_r": 0.49176046228937714, "aux_brier/n_active_tok": 22.46153846153846, "aux_brier/n_groups": 4.923076923076923, "aux_brier/n_step_records": 5.615384615384615, "calib/answer_extract_rate": 0.06640625, "calib/auroc": 0.9090909090909091, "calib/avg_num_step_conf": 0.28515625, "calib/ece": 0.7175, "calib/final_conf_rate": 0.046875, "calib/format_rate": 0.03125, "calib/frac_conf_gt_0.9": 0.5833333333333334, "calib/gap": 0.2063636363636363, "calib/mean_conf": 0.8008333333333333, "calib/mu_c": 0.99, "calib/mu_w": 0.7836363636363637, "calib/nonempty_final_conf_rate": 0.046875, "calib/nonempty_reasoning_rate": 0.0859375, "calib/nonempty_step_conf_rate": 0.05859375, "calib/pce": 0.7175, "calib/std_conf": 0.2886017536714256, "calib/step_conf_rate": 0.05859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09765625, "completions/max_length": 2927.0, "completions/max_terminated_length": 2927.0, "completions/mean_length": 663.49609375, "completions/mean_terminated_length": 735.3030395507812, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.0096, "grad_norm": 0.1932460218667984, "learning_rate": 2.25e-06, "loss": 0.0067, "num_tokens": 2464427.0, "reward": 0.0225673820823431, "reward_std": 0.06383019685745239, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.01214453112334013, "rewards/format_reward_step": 0.03125, "step": 9 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": -6.086005588379351e-09, "aux_brier/mean_group_std": 0.04457763181302451, "aux_brier/mean_r": 0.4550933216206302, "aux_brier/n_active_tok": 24.75, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 6.1875, "calib/answer_extract_rate": 0.08984375, "calib/auroc": 0.8529411764705882, "calib/avg_num_step_conf": 0.40234375, "calib/ece": 0.8458333333333334, "calib/final_conf_rate": 0.0703125, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.7777777777777778, "calib/gap": 0.09382352941176475, "calib/mean_conf": 0.901388888888889, "calib/mu_c": 0.99, "calib/mu_w": 0.8961764705882352, "calib/nonempty_final_conf_rate": 0.0703125, "calib/nonempty_reasoning_rate": 0.11328125, "calib/nonempty_step_conf_rate": 0.08984375, "calib/pce": 0.8458333333333334, "calib/std_conf": 0.2220473270095787, "calib/step_conf_rate": 0.08984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2955.0, "completions/max_terminated_length": 2955.0, "completions/mean_length": 673.3046875, "completions/mean_terminated_length": 727.2826538085938, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.010666666666666666, "grad_norm": 0.07814536243677139, "learning_rate": 2.5e-06, "loss": 0.0165, "num_tokens": 2743593.0, "reward": 0.039519019424915314, "reward_std": 0.08574040234088898, "rewards/accuracy_reward_step": 0.00390625, "rewards/final_brier_reward_step": 0.009638573974370956, "rewards/format_reward_step": 0.06640625, "step": 10 }, { "aux_brier/lambda": 0.05000000000000001, "aux_brier/loss": 2.290517282721978e-08, "aux_brier/mean_group_std": 0.04705990808180376, "aux_brier/mean_r": 0.43014223031838883, "aux_brier/n_active_tok": 27.238095238095237, "aux_brier/n_groups": 5.619047619047619, "aux_brier/n_step_records": 6.809523809523809, "calib/answer_extract_rate": 0.1171875, "calib/auroc": 0.5625, "calib/avg_num_step_conf": 0.60546875, "calib/ece": 0.6345000000000001, "calib/final_conf_rate": 0.0859375, "calib/format_rate": 0.06640625, "calib/frac_conf_gt_0.9": 0.8181818181818182, "calib/gap": 0.07004166666666656, "calib/mean_conf": 0.9072272727272727, "calib/mu_c": 0.9581666666666666, "calib/mu_w": 0.888125, "calib/nonempty_final_conf_rate": 0.0859375, "calib/nonempty_reasoning_rate": 0.15234375, "calib/nonempty_step_conf_rate": 0.12109375, "calib/pce": 0.6345000000000001, "calib/std_conf": 0.20186087283936693, "calib/step_conf_rate": 0.12109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08203125, "completions/max_length": 2868.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 722.78515625, "completions/mean_terminated_length": 787.3744506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.011733333333333333, "grad_norm": 0.027079828083515167, "learning_rate": 2.7500000000000004e-06, "loss": 0.0399, "num_tokens": 3033106.0, "reward": 0.06367333978414536, "reward_std": 0.16068580746650696, "rewards/accuracy_reward_step": 0.0234375, "rewards/final_brier_reward_step": 0.028130855411291122, "rewards/format_reward_step": 0.06640625, "step": 11 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 3.784316033872415e-08, "aux_brier/mean_group_std": 0.050264136058727066, "aux_brier/mean_r": 0.51797735672114, "aux_brier/n_active_tok": 26.72, "aux_brier/n_groups": 5.0, "aux_brier/n_step_records": 6.68, "calib/answer_extract_rate": 0.16015625, "calib/auroc": 0.5133333333333333, "calib/avg_num_step_conf": 0.66015625, "calib/ece": 0.6474705882352942, "calib/final_conf_rate": 0.1328125, "calib/format_rate": 0.10546875, "calib/frac_conf_gt_0.9": 0.7352941176470589, "calib/gap": 0.0350666666666668, "calib/mean_conf": 0.8868823529411765, "calib/mu_c": 0.9126666666666667, "calib/mu_w": 0.8775999999999999, "calib/nonempty_final_conf_rate": 0.1328125, "calib/nonempty_reasoning_rate": 0.1875, "calib/nonempty_step_conf_rate": 0.14453125, "calib/pce": 0.6348235294117648, "calib/std_conf": 0.175223011634398, "calib/step_conf_rate": 0.14453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05078125, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 598.9453125, "completions/mean_terminated_length": 630.9876098632812, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0128, "grad_norm": 0.0846952348947525, "learning_rate": 3e-06, "loss": 0.0453, "num_tokens": 3290612.0, "reward": 0.09865903109312057, "reward_std": 0.1780010312795639, "rewards/accuracy_reward_step": 0.03515625, "rewards/final_brier_reward_step": 0.043073609471321106, "rewards/format_reward_step": 0.10546875, "step": 12 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 1.8118686498007533e-07, "aux_brier/mean_group_std": 0.035596244655370236, "aux_brier/mean_r": 0.43525204805498086, "aux_brier/n_active_tok": 26.434782608695652, "aux_brier/n_groups": 5.130434782608695, "aux_brier/n_step_records": 6.608695652173913, "calib/answer_extract_rate": 0.15625, "calib/auroc": 0.40384615384615385, "calib/avg_num_step_conf": 0.6015625, "calib/ece": 0.7746896551724138, "calib/final_conf_rate": 0.11328125, "calib/format_rate": 0.09375, "calib/frac_conf_gt_0.9": 0.6896551724137931, "calib/gap": 0.020666666666666833, "calib/mean_conf": 0.8781379310344828, "calib/mu_c": 0.8966666666666668, "calib/mu_w": 0.876, "calib/nonempty_final_conf_rate": 0.11328125, "calib/nonempty_reasoning_rate": 0.17578125, "calib/nonempty_step_conf_rate": 0.125, "calib/pce": 0.7746896551724138, "calib/std_conf": 0.18809896957277586, "calib/step_conf_rate": 0.125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 3001.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 633.203125, "completions/mean_terminated_length": 683.9661865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.013866666666666666, "grad_norm": 0.008228050544857979, "learning_rate": 3.2500000000000002e-06, "loss": 0.0405, "num_tokens": 3557304.0, "reward": 0.07202839851379395, "reward_std": 0.14977121353149414, "rewards/accuracy_reward_step": 0.01953125, "rewards/final_brier_reward_step": 0.022488608956336975, "rewards/format_reward_step": 0.09375, "step": 13 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 6.9190393318145334e-09, "aux_brier/mean_group_std": 0.07575889127849744, "aux_brier/mean_r": 0.4560538181934339, "aux_brier/n_active_tok": 31.652173913043477, "aux_brier/n_groups": 5.391304347826087, "aux_brier/n_step_records": 7.913043478260869, "calib/answer_extract_rate": 0.1796875, "calib/auroc": 0.46296296296296297, "calib/avg_num_step_conf": 0.7265625, "calib/ece": 0.5727500000000001, "calib/final_conf_rate": 0.15625, "calib/format_rate": 0.12890625, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.07968660968660957, "calib/mean_conf": 0.8977499999999999, "calib/mu_c": 0.9515384615384613, "calib/mu_w": 0.8718518518518518, "calib/nonempty_final_conf_rate": 0.15625, "calib/nonempty_reasoning_rate": 0.203125, "calib/nonempty_step_conf_rate": 0.1640625, "calib/pce": 0.5727500000000001, "calib/std_conf": 0.2133833112031023, "calib/step_conf_rate": 0.1640625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07421875, "completions/max_length": 2915.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 589.546875, "completions/mean_terminated_length": 636.8101196289062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.014933333333333333, "grad_norm": 0.03263997659087181, "learning_rate": 3.5e-06, "loss": 0.0416, "num_tokens": 3813628.0, "reward": 0.12998661398887634, "reward_std": 0.2102944701910019, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.05900898203253746, "rewards/format_reward_step": 0.12890625, "step": 14 }, { "aux_brier/lambda": 0.05000000000000002, "aux_brier/loss": 6.183022988048847e-08, "aux_brier/mean_group_std": 0.07469891433623198, "aux_brier/mean_r": 0.4410105300338308, "aux_brier/n_active_tok": 30.333333333333332, "aux_brier/n_groups": 5.166666666666667, "aux_brier/n_step_records": 7.583333333333333, "calib/answer_extract_rate": 0.22265625, "calib/auroc": 0.3846153846153846, "calib/avg_num_step_conf": 0.765625, "calib/ece": 0.6639130434782607, "calib/final_conf_rate": 0.1796875, "calib/format_rate": 0.1328125, "calib/frac_conf_gt_0.9": 0.8478260869565217, "calib/gap": -0.012867132867132813, "calib/mean_conf": 0.9400000000000001, "calib/mu_c": 0.9307692307692308, "calib/mu_w": 0.9436363636363636, "calib/nonempty_final_conf_rate": 0.1796875, "calib/nonempty_reasoning_rate": 0.2578125, "calib/nonempty_step_conf_rate": 0.1796875, "calib/pce": 0.6606521739130433, "calib/std_conf": 0.07723481981143186, "calib/step_conf_rate": 0.1796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 3062.0, "completions/max_terminated_length": 3062.0, "completions/mean_length": 606.9140625, "completions/mean_terminated_length": 652.815185546875, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.016, "grad_norm": 0.08959263563156128, "learning_rate": 3.7500000000000005e-06, "loss": 0.0954, "num_tokens": 4076878.0, "reward": 0.12888270616531372, "reward_std": 0.24933215975761414, "rewards/accuracy_reward_step": 0.05078125, "rewards/final_brier_reward_step": 0.04678086191415787, "rewards/format_reward_step": 0.1328125, "step": 15 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.0848133385239172e-07, "aux_brier/mean_group_std": 0.09843642749678756, "aux_brier/mean_r": 0.44798128896741424, "aux_brier/n_active_tok": 47.714285714285715, "aux_brier/n_groups": 8.0, "aux_brier/n_step_records": 11.928571428571429, "calib/answer_extract_rate": 0.2421875, "calib/auroc": 0.5825358851674641, "calib/avg_num_step_conf": 1.31640625, "calib/ece": 0.6845170068027212, "calib/final_conf_rate": 0.19140625, "calib/format_rate": 0.171875, "calib/frac_conf_gt_0.9": 0.8367346938775511, "calib/gap": 0.06927113237639537, "calib/mean_conf": 0.9090068027210885, "calib/mu_c": 0.9627272727272725, "calib/mu_w": 0.8934561403508772, "calib/nonempty_final_conf_rate": 0.19140625, "calib/nonempty_reasoning_rate": 0.26953125, "calib/nonempty_step_conf_rate": 0.21484375, "calib/pce": 0.6845170068027212, "calib/std_conf": 0.1927121205450507, "calib/step_conf_rate": 0.21484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.046875, "completions/max_length": 2984.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 639.68359375, "completions/mean_terminated_length": 671.1434326171875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.017066666666666667, "grad_norm": 0.10603635013103485, "learning_rate": 4.000000000000001e-06, "loss": 0.0904, "num_tokens": 4349485.0, "reward": 0.14869771897792816, "reward_std": 0.2691415250301361, "rewards/accuracy_reward_step": 0.046875, "rewards/final_brier_reward_step": 0.06354087591171265, "rewards/format_reward_step": 0.171875, "step": 16 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.634651545818656e-08, "aux_brier/mean_group_std": 0.16041422243976916, "aux_brier/mean_r": 0.4858352168629629, "aux_brier/n_active_tok": 59.06666666666667, "aux_brier/n_groups": 7.033333333333333, "aux_brier/n_step_records": 14.766666666666667, "calib/answer_extract_rate": 0.3203125, "calib/auroc": 0.5146750524109014, "calib/avg_num_step_conf": 1.73828125, "calib/ece": 0.6458450704225353, "calib/final_conf_rate": 0.27734375, "calib/format_rate": 0.23828125, "calib/frac_conf_gt_0.9": 0.7183098591549296, "calib/gap": 0.03657232704402513, "calib/mean_conf": 0.8993661971830985, "calib/mu_c": 0.9266666666666666, "calib/mu_w": 0.8900943396226415, "calib/nonempty_final_conf_rate": 0.27734375, "calib/nonempty_reasoning_rate": 0.375, "calib/nonempty_step_conf_rate": 0.3203125, "calib/pce": 0.6458450704225353, "calib/std_conf": 0.16196837105918782, "calib/step_conf_rate": 0.3203125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0703125, "completions/max_length": 2940.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 548.0078125, "completions/mean_terminated_length": 589.4537963867188, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.018133333333333335, "grad_norm": 0.16882355511188507, "learning_rate": 4.25e-06, "loss": 0.0821, "num_tokens": 4593303.0, "reward": 0.2112351357936859, "reward_std": 0.29052725434303284, "rewards/accuracy_reward_step": 0.0703125, "rewards/final_brier_reward_step": 0.08712802827358246, "rewards/format_reward_step": 0.23828125, "step": 17 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.5866668310020165e-09, "aux_brier/mean_group_std": 0.10337834969695207, "aux_brier/mean_r": 0.4454792474401508, "aux_brier/n_active_tok": 41.42857142857143, "aux_brier/n_groups": 5.714285714285714, "aux_brier/n_step_records": 10.357142857142858, "calib/answer_extract_rate": 0.28515625, "calib/auroc": 0.4962121212121212, "calib/avg_num_step_conf": 1.1484375, "calib/ece": 0.5890161290322582, "calib/final_conf_rate": 0.2421875, "calib/format_rate": 0.19140625, "calib/frac_conf_gt_0.9": 0.7096774193548387, "calib/gap": 0.05194191919191926, "calib/mean_conf": 0.8686935483870968, "calib/mu_c": 0.9055555555555556, "calib/mu_w": 0.8536136363636363, "calib/nonempty_final_conf_rate": 0.2421875, "calib/nonempty_reasoning_rate": 0.33984375, "calib/nonempty_step_conf_rate": 0.265625, "calib/pce": 0.5836935483870969, "calib/std_conf": 0.22807947035427445, "calib/step_conf_rate": 0.265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05859375, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 574.22265625, "completions/mean_terminated_length": 609.9627075195312, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.0192, "grad_norm": 0.29395243525505066, "learning_rate": 4.5e-06, "loss": 0.0954, "num_tokens": 4851024.0, "reward": 0.1949663907289505, "reward_std": 0.2701266407966614, "rewards/accuracy_reward_step": 0.078125, "rewards/final_brier_reward_step": 0.08455304056406021, "rewards/format_reward_step": 0.19140625, "step": 18 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.71914328237633e-09, "aux_brier/mean_group_std": 0.18596475159403847, "aux_brier/mean_r": 0.4801690742344864, "aux_brier/n_active_tok": 89.0, "aux_brier/n_groups": 8.46875, "aux_brier/n_step_records": 22.25, "calib/answer_extract_rate": 0.56640625, "calib/auroc": 0.6119100294985251, "calib/avg_num_step_conf": 2.87109375, "calib/ece": 0.7139416058394159, "calib/final_conf_rate": 0.53515625, "calib/format_rate": 0.44921875, "calib/frac_conf_gt_0.9": 0.7372262773722628, "calib/gap": 0.07784660766961649, "calib/mean_conf": 0.8891240875912408, "calib/mu_c": 0.9533333333333333, "calib/mu_w": 0.8754867256637168, "calib/nonempty_final_conf_rate": 0.53515625, "calib/nonempty_reasoning_rate": 0.66796875, "calib/nonempty_step_conf_rate": 0.5703125, "calib/pce": 0.7139416058394159, "calib/std_conf": 0.1940820597482887, "calib/step_conf_rate": 0.5703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2724.0, "completions/max_terminated_length": 2724.0, "completions/mean_length": 425.109375, "completions/mean_terminated_length": 435.31201171875, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.020266666666666665, "grad_norm": 0.08734527975320816, "learning_rate": 4.75e-06, "loss": 0.1709, "num_tokens": 5064612.0, "reward": 0.35721492767333984, "reward_std": 0.41247040033340454, "rewards/accuracy_reward_step": 0.09375, "rewards/final_brier_reward_step": 0.15542227029800415, "rewards/format_reward_step": 0.44921875, "step": 19 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.3638003304737865e-08, "aux_brier/mean_group_std": 0.19777696493153557, "aux_brier/mean_r": 0.51686083569449, "aux_brier/n_active_tok": 100.125, "aux_brier/n_groups": 8.21875, "aux_brier/n_step_records": 25.03125, "calib/answer_extract_rate": 0.67578125, "calib/auroc": 0.5950396825396825, "calib/avg_num_step_conf": 3.234375, "calib/ece": 0.6353620481927711, "calib/final_conf_rate": 0.6484375, "calib/format_rate": 0.515625, "calib/frac_conf_gt_0.9": 0.6807228915662651, "calib/gap": 0.07038412698412688, "calib/mean_conf": 0.8763259036144578, "calib/mu_c": 0.92975, "calib/mu_w": 0.8593658730158731, "calib/nonempty_final_conf_rate": 0.6484375, "calib/nonempty_reasoning_rate": 0.78515625, "calib/nonempty_step_conf_rate": 0.66796875, "calib/pce": 0.6353620481927711, "calib/std_conf": 0.2056600185856877, "calib/step_conf_rate": 0.66796875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2478.0, "completions/max_terminated_length": 2478.0, "completions/mean_length": 361.2421875, "completions/mean_terminated_length": 366.9762268066406, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.021333333333333333, "grad_norm": 0.18377214670181274, "learning_rate": 5e-06, "loss": 0.085, "num_tokens": 5261962.0, "reward": 0.46865132451057434, "reward_std": 0.5203608274459839, "rewards/accuracy_reward_step": 0.15625, "rewards/final_brier_reward_step": 0.21835541725158691, "rewards/format_reward_step": 0.515625, "step": 20 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4970630329153023e-08, "aux_brier/mean_group_std": 0.22389224604257676, "aux_brier/mean_r": 0.5237990710747721, "aux_brier/n_active_tok": 132.125, "aux_brier/n_groups": 10.59375, "aux_brier/n_step_records": 33.03125, "calib/answer_extract_rate": 0.796875, "calib/auroc": 0.5736507936507937, "calib/avg_num_step_conf": 4.2265625, "calib/ece": 0.6526098958333334, "calib/final_conf_rate": 0.75, "calib/format_rate": 0.6640625, "calib/frac_conf_gt_0.9": 0.7395833333333334, "calib/gap": 0.07685742857142852, "calib/mean_conf": 0.8713598958333334, "calib/mu_c": 0.9314047619047618, "calib/mu_w": 0.8545473333333333, "calib/nonempty_final_conf_rate": 0.75, "calib/nonempty_reasoning_rate": 0.87109375, "calib/nonempty_step_conf_rate": 0.7734375, "calib/pce": 0.6526098958333334, "calib/std_conf": 0.22478363139637003, "calib/step_conf_rate": 0.7734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3045.0, "completions/max_terminated_length": 3045.0, "completions/mean_length": 367.05859375, "completions/mean_terminated_length": 368.498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 24.0, "epoch": 0.0224, "grad_norm": 0.19530241191387177, "learning_rate": 4.9722222222222224e-06, "loss": 0.0974, "num_tokens": 5458889.0, "reward": 0.563724935054779, "reward_std": 0.5039910078048706, "rewards/accuracy_reward_step": 0.16796875, "rewards/final_brier_reward_step": 0.25489968061447144, "rewards/format_reward_step": 0.6640625, "step": 21 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.0222760971068112e-09, "aux_brier/mean_group_std": 0.20883598233943224, "aux_brier/mean_r": 0.498378047303362, "aux_brier/n_active_tok": 126.875, "aux_brier/n_groups": 8.25, "aux_brier/n_step_records": 31.71875, "calib/answer_extract_rate": 0.83984375, "calib/auroc": 0.46816097809475293, "calib/avg_num_step_conf": 4.12890625, "calib/ece": 0.5995587962962964, "calib/final_conf_rate": 0.84375, "calib/format_rate": 0.74609375, "calib/frac_conf_gt_0.9": 0.7083333333333334, "calib/gap": 0.028774172185430325, "calib/mean_conf": 0.9004847222222223, "calib/mu_c": 0.9206, "calib/mu_w": 0.8918258278145696, "calib/nonempty_final_conf_rate": 0.84375, "calib/nonempty_reasoning_rate": 0.91015625, "calib/nonempty_step_conf_rate": 0.84765625, "calib/pce": 0.5995587962962964, "calib/std_conf": 0.16218625466350314, "calib/step_conf_rate": 0.84765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1795.0, "completions/max_terminated_length": 1795.0, "completions/mean_length": 299.58984375, "completions/mean_terminated_length": 303.1423034667969, "completions/min_length": 0.0, "completions/min_terminated_length": 3.0, "epoch": 0.023466666666666667, "grad_norm": 0.11125664412975311, "learning_rate": 4.944444444444445e-06, "loss": 0.067, "num_tokens": 5637400.0, "reward": 0.7065411806106567, "reward_std": 0.551918625831604, "rewards/accuracy_reward_step": 0.25390625, "rewards/final_brier_reward_step": 0.3183521032333374, "rewards/format_reward_step": 0.74609375, "step": 22 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.4593973210463851e-08, "aux_brier/mean_group_std": 0.21091686692543818, "aux_brier/mean_r": 0.4781024369606529, "aux_brier/n_active_tok": 128.25, "aux_brier/n_groups": 7.5, "aux_brier/n_step_records": 32.0625, "calib/answer_extract_rate": 0.87890625, "calib/auroc": 0.6023887300939159, "calib/avg_num_step_conf": 4.0859375, "calib/ece": 0.6349454545454545, "calib/final_conf_rate": 0.859375, "calib/format_rate": 0.8125, "calib/frac_conf_gt_0.9": 0.6954545454545454, "calib/gap": 0.014763576970192038, "calib/mean_conf": 0.9009454545454545, "calib/mu_c": 0.9115483870967743, "calib/mu_w": 0.8967848101265823, "calib/nonempty_final_conf_rate": 0.859375, "calib/nonempty_reasoning_rate": 0.95703125, "calib/nonempty_step_conf_rate": 0.9140625, "calib/pce": 0.6270363636363635, "calib/std_conf": 0.15172348271316333, "calib/step_conf_rate": 0.9140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2580.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 292.29296875, "completions/mean_terminated_length": 293.4392395019531, "completions/min_length": 0.0, "completions/min_terminated_length": 29.0, "epoch": 0.024533333333333334, "grad_norm": 0.2156190723180771, "learning_rate": 4.9166666666666665e-06, "loss": 0.0804, "num_tokens": 5816163.0, "reward": 0.7280130386352539, "reward_std": 0.4908643662929535, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.31830233335494995, "rewards/format_reward_step": 0.8125, "step": 23 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.842990707746967e-09, "aux_brier/mean_group_std": 0.21221712414175256, "aux_brier/mean_r": 0.5030739964811978, "aux_brier/n_active_tok": 164.625, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 41.15625, "calib/answer_extract_rate": 0.90234375, "calib/auroc": 0.5535012285012284, "calib/avg_num_step_conf": 5.18359375, "calib/ece": 0.7131834061135371, "calib/final_conf_rate": 0.89453125, "calib/format_rate": 0.84765625, "calib/frac_conf_gt_0.9": 0.7248908296943232, "calib/gap": 0.02142678132678133, "calib/mean_conf": 0.8974628820960697, "calib/mu_c": 0.9147727272727273, "calib/mu_w": 0.893345945945946, "calib/nonempty_final_conf_rate": 0.89453125, "calib/nonempty_reasoning_rate": 0.96484375, "calib/nonempty_step_conf_rate": 0.9375, "calib/pce": 0.7092532751091704, "calib/std_conf": 0.1666275680637964, "calib/step_conf_rate": 0.9375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2036.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 305.6015625, "completions/mean_terminated_length": 305.6015625, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.0256, "grad_norm": 1.4956940412521362, "learning_rate": 4.888888888888889e-06, "loss": 0.0533, "num_tokens": 5998909.0, "reward": 0.6711543798446655, "reward_std": 0.42066627740859985, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.2705550789833069, "rewards/format_reward_step": 0.84765625, "step": 24 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.9371144266709948e-08, "aux_brier/mean_group_std": 0.18362094956793468, "aux_brier/mean_r": 0.46135988830777225, "aux_brier/n_active_tok": 157.5, "aux_brier/n_groups": 9.125, "aux_brier/n_step_records": 39.375, "calib/answer_extract_rate": 0.91796875, "calib/auroc": 0.5193012116089039, "calib/avg_num_step_conf": 5.0078125, "calib/ece": 0.6337889367816092, "calib/final_conf_rate": 0.90625, "calib/format_rate": 0.8515625, "calib/frac_conf_gt_0.9": 0.7543103448275862, "calib/gap": 0.05237352305813836, "calib/mean_conf": 0.9053406609195404, "calib/mu_c": 0.9434920634920634, "calib/mu_w": 0.891118540433925, "calib/nonempty_final_conf_rate": 0.90625, "calib/nonempty_reasoning_rate": 0.97265625, "calib/nonempty_step_conf_rate": 0.94140625, "calib/pce": 0.6337889367816092, "calib/std_conf": 0.17423891821746895, "calib/step_conf_rate": 0.94140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1640.0, "completions/max_terminated_length": 1640.0, "completions/mean_length": 296.6328125, "completions/mean_terminated_length": 297.79608154296875, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.02666666666666667, "grad_norm": 0.8203977942466736, "learning_rate": 4.861111111111111e-06, "loss": 0.061, "num_tokens": 6178071.0, "reward": 0.7586723566055298, "reward_std": 0.47993963956832886, "rewards/accuracy_reward_step": 0.25, "rewards/final_brier_reward_step": 0.33156442642211914, "rewards/format_reward_step": 0.8515625, "step": 25 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1095086800694265e-08, "aux_brier/mean_group_std": 0.18773403585079074, "aux_brier/mean_r": 0.48856257711885304, "aux_brier/n_active_tok": 166.5, "aux_brier/n_groups": 8.71875, "aux_brier/n_step_records": 41.625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4795587280986372, "calib/avg_num_step_conf": 5.3046875, "calib/ece": 0.7157371120107964, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.92578125, "calib/frac_conf_gt_0.9": 0.7408906882591093, "calib/gap": 0.020350234335568484, "calib/mean_conf": 0.9019179487179487, "calib/mu_c": 0.9184782608695651, "calib/mu_w": 0.8981280265339966, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.97265625, "calib/pce": 0.71571012145749, "calib/std_conf": 0.17176024475891646, "calib/step_conf_rate": 0.97265625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 310.42578125, "completions/mean_terminated_length": 310.42578125, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.027733333333333332, "grad_norm": 0.03431373089551926, "learning_rate": 4.833333333333333e-06, "loss": 0.1124, "num_tokens": 6362780.0, "reward": 0.7162113189697266, "reward_std": 0.38292473554611206, "rewards/accuracy_reward_step": 0.1796875, "rewards/final_brier_reward_step": 0.29453274607658386, "rewards/format_reward_step": 0.92578125, "step": 26 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.646272480478663e-08, "aux_brier/mean_group_std": 0.18948043789191105, "aux_brier/mean_r": 0.47236006589088697, "aux_brier/n_active_tok": 163.875, "aux_brier/n_groups": 9.09375, "aux_brier/n_step_records": 40.96875, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.4680213903743315, "calib/avg_num_step_conf": 5.17578125, "calib/ece": 0.705890576652602, "calib/final_conf_rate": 0.92578125, "calib/format_rate": 0.88671875, "calib/frac_conf_gt_0.9": 0.7552742616033755, "calib/gap": -0.015956745098039127, "calib/mean_conf": 0.8970630098452883, "calib/mu_c": 0.8844726666666667, "calib/mu_w": 0.9004294117647058, "calib/nonempty_final_conf_rate": 0.92578125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.96875, "calib/pce": 0.6959915611814347, "calib/std_conf": 0.19202561087719486, "calib/step_conf_rate": 0.96875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1533.0, "completions/max_terminated_length": 1533.0, "completions/mean_length": 285.203125, "completions/mean_terminated_length": 285.203125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.0288, "grad_norm": 1.375303030014038, "learning_rate": 4.805555555555556e-06, "loss": 0.0697, "num_tokens": 6541008.0, "reward": 0.715735673904419, "reward_std": 0.42697474360466003, "rewards/accuracy_reward_step": 0.19921875, "rewards/final_brier_reward_step": 0.29263025522232056, "rewards/format_reward_step": 0.88671875, "step": 27 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.751273354757783e-09, "aux_brier/mean_group_std": 0.20084937529796282, "aux_brier/mean_r": 0.46332548943538576, "aux_brier/n_active_tok": 171.0, "aux_brier/n_groups": 9.8125, "aux_brier/n_step_records": 42.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5777543061423466, "calib/avg_num_step_conf": 5.41015625, "calib/ece": 0.6407212851405623, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.93359375, "calib/frac_conf_gt_0.9": 0.7349397590361446, "calib/gap": 0.031039356516087135, "calib/mean_conf": 0.9062666666666667, "calib/mu_c": 0.9288294117647059, "calib/mu_w": 0.8977900552486188, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98046875, "calib/pce": 0.6369477911646586, "calib/std_conf": 0.156506920864373, "calib/step_conf_rate": 0.98046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 903.0, "completions/max_terminated_length": 903.0, "completions/mean_length": 284.25, "completions/mean_terminated_length": 285.3647155761719, "completions/min_length": 0.0, "completions/min_terminated_length": 5.0, "epoch": 0.029866666666666666, "grad_norm": 0.5083498358726501, "learning_rate": 4.777777777777778e-06, "loss": 0.021, "num_tokens": 6720720.0, "reward": 0.823421835899353, "reward_std": 0.4264180362224579, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.36399996280670166, "rewards/format_reward_step": 0.93359375, "step": 28 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.350772413534097e-09, "aux_brier/mean_group_std": 0.19330138283500833, "aux_brier/mean_r": 0.4670754356864333, "aux_brier/n_active_tok": 175.25, "aux_brier/n_groups": 9.3125, "aux_brier/n_step_records": 43.8125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5134239130434782, "calib/avg_num_step_conf": 5.55859375, "calib/ece": 0.7283373983739839, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.7845528455284553, "calib/gap": 0.029248695652173917, "calib/mean_conf": 0.915329268292683, "calib/mu_c": 0.9391086956521739, "calib/mu_w": 0.90986, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.7283373983739839, "calib/std_conf": 0.14145036087094834, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1416.0, "completions/max_terminated_length": 1416.0, "completions/mean_length": 291.09375, "completions/mean_terminated_length": 291.09375, "completions/min_length": 81.0, "completions/min_terminated_length": 81.0, "epoch": 0.030933333333333334, "grad_norm": 0.9130586385726929, "learning_rate": 4.75e-06, "loss": -0.0081, "num_tokens": 6902368.0, "reward": 0.7286772131919861, "reward_std": 0.37026363611221313, "rewards/accuracy_reward_step": 0.18359375, "rewards/final_brier_reward_step": 0.2897088825702667, "rewards/format_reward_step": 0.9453125, "step": 29 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.160996042359752e-09, "aux_brier/mean_group_std": 0.19545460216574173, "aux_brier/mean_r": 0.4584137314399851, "aux_brier/n_active_tok": 183.375, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 45.84375, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5740104166666666, "calib/avg_num_step_conf": 5.86328125, "calib/ece": 0.7096774193548387, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.9453125, "calib/frac_conf_gt_0.9": 0.75, "calib/gap": 0.03160000000000007, "calib/mean_conf": 0.902016129032258, "calib/mu_c": 0.9275000000000001, "calib/mu_w": 0.8959, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.7090725806451613, "calib/std_conf": 0.1675775210053721, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1568.0, "completions/max_terminated_length": 1568.0, "completions/mean_length": 298.3984375, "completions/mean_terminated_length": 299.5686340332031, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.032, "grad_norm": 0.11302852630615234, "learning_rate": 4.722222222222222e-06, "loss": 0.03, "num_tokens": 7085742.0, "reward": 0.7374452948570251, "reward_std": 0.3994256556034088, "rewards/accuracy_reward_step": 0.1875, "rewards/final_brier_reward_step": 0.30915623903274536, "rewards/format_reward_step": 0.9453125, "step": 30 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.083522158251142e-10, "aux_brier/mean_group_std": 0.20664738762094437, "aux_brier/mean_r": 0.5002378306483395, "aux_brier/n_active_tok": 185.625, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 46.40625, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5280216802168021, "calib/avg_num_step_conf": 6.08984375, "calib/ece": 0.710364, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.7, "calib/gap": 0.036683468834688426, "calib/mean_conf": 0.890364, "calib/mu_c": 0.9204444444444445, "calib/mu_w": 0.8837609756097561, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.710364, "calib/std_conf": 0.17630550616472535, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1032.0, "completions/max_terminated_length": 1032.0, "completions/mean_length": 287.5234375, "completions/mean_terminated_length": 288.6510009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.03306666666666667, "grad_norm": 1.7296192646026611, "learning_rate": 4.694444444444445e-06, "loss": 0.0086, "num_tokens": 7265260.0, "reward": 0.7285614013671875, "reward_std": 0.336410254240036, "rewards/accuracy_reward_step": 0.17578125, "rewards/final_brier_reward_step": 0.31268310546875, "rewards/format_reward_step": 0.94921875, "step": 31 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.435025147466522e-08, "aux_brier/mean_group_std": 0.2067278140733199, "aux_brier/mean_r": 0.5350343102531423, "aux_brier/n_active_tok": 174.625, "aux_brier/n_groups": 8.6875, "aux_brier/n_step_records": 43.65625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5011188686000716, "calib/avg_num_step_conf": 5.5234375, "calib/ece": 0.6456521739130435, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.6205533596837944, "calib/gap": 0.022327246688149005, "calib/mean_conf": 0.8709486166007905, "calib/mu_c": 0.8882456140350876, "calib/mu_w": 0.8659183673469386, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.6456521739130435, "calib/std_conf": 0.16714260238987344, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 789.0, "completions/max_terminated_length": 789.0, "completions/mean_length": 273.2890625, "completions/mean_terminated_length": 273.2890625, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.034133333333333335, "grad_norm": 0.33643317222595215, "learning_rate": 4.666666666666667e-06, "loss": 0.0037, "num_tokens": 7441926.0, "reward": 0.8007103204727173, "reward_std": 0.3542967438697815, "rewards/accuracy_reward_step": 0.22265625, "rewards/final_brier_reward_step": 0.3747164011001587, "rewards/format_reward_step": 0.96875, "step": 32 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.842371184061612e-09, "aux_brier/mean_group_std": 0.2071667422683363, "aux_brier/mean_r": 0.5277406418950331, "aux_brier/n_active_tok": 188.5, "aux_brier/n_groups": 10.53125, "aux_brier/n_step_records": 47.125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4771356783919598, "calib/avg_num_step_conf": 5.92578125, "calib/ece": 0.666940562248996, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.642570281124498, "calib/gap": 0.02484522613065321, "calib/mean_conf": 0.8677437751004016, "calib/mu_c": 0.8876, "calib/mu_w": 0.8627547738693467, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.666940562248996, "calib/std_conf": 0.19149247358883403, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1823.0, "completions/max_terminated_length": 1823.0, "completions/mean_length": 312.23046875, "completions/mean_terminated_length": 312.23046875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0352, "grad_norm": 0.5383950471878052, "learning_rate": 4.638888888888889e-06, "loss": 0.0317, "num_tokens": 7628729.0, "reward": 0.755446195602417, "reward_std": 0.36358797550201416, "rewards/accuracy_reward_step": 0.1953125, "rewards/final_brier_reward_step": 0.3420972228050232, "rewards/format_reward_step": 0.94921875, "step": 33 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2737829242770449e-08, "aux_brier/mean_group_std": 0.2017562837895255, "aux_brier/mean_r": 0.5390499524522482, "aux_brier/n_active_tok": 181.25, "aux_brier/n_groups": 9.15625, "aux_brier/n_step_records": 45.3125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4530153754291685, "calib/avg_num_step_conf": 5.77734375, "calib/ece": 0.5866932270916336, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.6175298804780877, "calib/gap": 0.0031907747424987365, "calib/mean_conf": 0.8680478087649403, "calib/mu_c": 0.8702597402597402, "calib/mu_w": 0.8670689655172414, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.57398406374502, "calib/std_conf": 0.19062011852590993, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 846.0, "completions/max_terminated_length": 846.0, "completions/mean_length": 274.18359375, "completions/mean_terminated_length": 275.25885009765625, "completions/min_length": 0.0, "completions/min_terminated_length": 97.0, "epoch": 0.03626666666666667, "grad_norm": 0.40328362584114075, "learning_rate": 4.611111111111112e-06, "loss": -0.0135, "num_tokens": 7804032.0, "reward": 0.881942868232727, "reward_std": 0.439346045255661, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.4183965027332306, "rewards/format_reward_step": 0.953125, "step": 34 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.857877070722608e-09, "aux_brier/mean_group_std": 0.21221449500511383, "aux_brier/mean_r": 0.5864900531868034, "aux_brier/n_active_tok": 204.625, "aux_brier/n_groups": 11.9375, "aux_brier/n_step_records": 51.15625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4663071613459879, "calib/avg_num_step_conf": 6.4140625, "calib/ece": 0.626573705179283, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.5338645418326693, "calib/gap": -0.030287316652286345, "calib/mean_conf": 0.8422709163346614, "calib/mu_c": 0.8193442622950821, "calib/mu_w": 0.8496315789473684, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.6129083665338646, "calib/std_conf": 0.1989394748463672, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1683.0, "completions/max_terminated_length": 1683.0, "completions/mean_length": 317.76171875, "completions/mean_terminated_length": 317.76171875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.037333333333333336, "grad_norm": 0.19969779253005981, "learning_rate": 4.583333333333333e-06, "loss": 0.057, "num_tokens": 7994635.0, "reward": 0.828087568283081, "reward_std": 0.31002628803253174, "rewards/accuracy_reward_step": 0.23828125, "rewards/final_brier_reward_step": 0.39828789234161377, "rewards/format_reward_step": 0.98046875, "step": 35 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.909607559580362e-08, "aux_brier/mean_group_std": 0.19722265412808157, "aux_brier/mean_r": 0.6097747510974779, "aux_brier/n_active_tok": 183.25, "aux_brier/n_groups": 9.375, "aux_brier/n_step_records": 45.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.46617479327060163, "calib/avg_num_step_conf": 5.765625, "calib/ece": 0.4968900398406375, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.4581673306772908, "calib/gap": -0.003002594810379011, "calib/mean_conf": 0.8128310756972111, "calib/mu_c": 0.8108333333333335, "calib/mu_w": 0.8138359281437125, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.48752988047808765, "calib/std_conf": 0.20786458008957415, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1956.0, "completions/max_terminated_length": 1956.0, "completions/mean_length": 300.7421875, "completions/mean_terminated_length": 300.7421875, "completions/min_length": 73.0, "completions/min_terminated_length": 73.0, "epoch": 0.0384, "grad_norm": 0.035050421953201294, "learning_rate": 4.555555555555556e-06, "loss": 0.0316, "num_tokens": 8174337.0, "reward": 0.9423432350158691, "reward_std": 0.4082295000553131, "rewards/accuracy_reward_step": 0.3359375, "rewards/final_brier_reward_step": 0.48812299966812134, "rewards/format_reward_step": 0.96875, "step": 36 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.402787857043066e-08, "aux_brier/mean_group_std": 0.21307766927910904, "aux_brier/mean_r": 0.631122288371699, "aux_brier/n_active_tok": 194.125, "aux_brier/n_groups": 11.46875, "aux_brier/n_step_records": 48.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.561368778280543, "calib/avg_num_step_conf": 6.23046875, "calib/ece": 0.5385600000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.412, "calib/gap": 0.05445862960568848, "calib/mean_conf": 0.7925599999999999, "calib/mu_c": 0.8322058823529412, "calib/mu_w": 0.7777472527472528, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.52956, "calib/std_conf": 0.23220991882346456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 898.0, "completions/max_terminated_length": 898.0, "completions/mean_length": 291.6953125, "completions/mean_terminated_length": 292.8392333984375, "completions/min_length": 0.0, "completions/min_terminated_length": 48.0, "epoch": 0.039466666666666664, "grad_norm": 0.8316993713378906, "learning_rate": 4.527777777777778e-06, "loss": 0.0747, "num_tokens": 8356107.0, "reward": 0.8771013617515564, "reward_std": 0.37841999530792236, "rewards/accuracy_reward_step": 0.26953125, "rewards/final_brier_reward_step": 0.4849679470062256, "rewards/format_reward_step": 0.97265625, "step": 37 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.015820735813751e-09, "aux_brier/mean_group_std": 0.1990842058790093, "aux_brier/mean_r": 0.6854511867977533, "aux_brier/n_active_tok": 191.75, "aux_brier/n_groups": 10.125, "aux_brier/n_step_records": 47.9375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4861378977820636, "calib/avg_num_step_conf": 6.0234375, "calib/ece": 0.5195219123505976, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.3904382470119522, "calib/gap": -0.0017807778849243672, "calib/mean_conf": 0.7777689243027889, "calib/mu_c": 0.7764705882352941, "calib/mu_w": 0.7782513661202185, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.5131872509960159, "calib/std_conf": 0.22659220261363155, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1089.0, "completions/max_terminated_length": 1089.0, "completions/mean_length": 287.65625, "completions/mean_terminated_length": 288.7843322753906, "completions/min_length": 0.0, "completions/min_terminated_length": 70.0, "epoch": 0.04053333333333333, "grad_norm": 0.5050545334815979, "learning_rate": 4.5e-06, "loss": 0.0058, "num_tokens": 8536635.0, "reward": 0.8718788623809814, "reward_std": 0.3719131648540497, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.4797031581401825, "rewards/format_reward_step": 0.97265625, "step": 38 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.3672130289021922e-08, "aux_brier/mean_group_std": 0.2077947444626746, "aux_brier/mean_r": 0.7109347300063643, "aux_brier/n_active_tok": 185.75, "aux_brier/n_groups": 10.34375, "aux_brier/n_step_records": 46.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4582189239332097, "calib/avg_num_step_conf": 5.8359375, "calib/ece": 0.4736904761904761, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.3253968253968254, "calib/gap": -0.04231168831168819, "calib/mean_conf": 0.7282142857142857, "calib/mu_c": 0.6988311688311689, "calib/mu_w": 0.7411428571428571, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4481746031746031, "calib/std_conf": 0.25118999661430186, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1725.0, "completions/max_terminated_length": 1725.0, "completions/mean_length": 296.5859375, "completions/mean_terminated_length": 296.5859375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "epoch": 0.0416, "grad_norm": 1.7626452445983887, "learning_rate": 4.472222222222223e-06, "loss": 0.0308, "num_tokens": 8718649.0, "reward": 0.9181327819824219, "reward_std": 0.3791179060935974, "rewards/accuracy_reward_step": 0.30078125, "rewards/final_brier_reward_step": 0.516281247138977, "rewards/format_reward_step": 0.9765625, "step": 39 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.385072284451841e-08, "aux_brier/mean_group_std": 0.17809794199122972, "aux_brier/mean_r": 0.7468761257165805, "aux_brier/n_active_tok": 199.125, "aux_brier/n_groups": 11.28125, "aux_brier/n_step_records": 49.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48548223350253805, "calib/avg_num_step_conf": 6.25390625, "calib/ece": 0.4875438596491228, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.2631578947368421, "calib/gap": 0.01573231810490694, "calib/mean_conf": 0.6832523616734143, "calib/mu_c": 0.6958, "calib/mu_w": 0.680067681895093, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.4841835357624831, "calib/std_conf": 0.27774150629818234, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2143.0, "completions/max_terminated_length": 2143.0, "completions/mean_length": 320.19140625, "completions/mean_terminated_length": 320.19140625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.042666666666666665, "grad_norm": 0.11550385504961014, "learning_rate": 4.444444444444444e-06, "loss": 0.0535, "num_tokens": 8907378.0, "reward": 0.811775803565979, "reward_std": 0.37996870279312134, "rewards/accuracy_reward_step": 0.203125, "rewards/final_brier_reward_step": 0.5127280950546265, "rewards/format_reward_step": 0.9609375, "step": 40 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.036460420602349e-08, "aux_brier/mean_group_std": 0.17564145897266573, "aux_brier/mean_r": 0.775780914182563, "aux_brier/n_active_tok": 189.875, "aux_brier/n_groups": 11.1875, "aux_brier/n_step_records": 47.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4394172763445001, "calib/avg_num_step_conf": 6.03125, "calib/ece": 0.32107509881422935, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.1857707509881423, "calib/gap": -0.055001155179052574, "calib/mean_conf": 0.6242213438735178, "calib/mu_c": 0.5922641509433964, "calib/mu_w": 0.6472653061224489, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2631620553359684, "calib/std_conf": 0.27202908764445805, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 895.0, "completions/max_terminated_length": 895.0, "completions/mean_length": 283.78125, "completions/mean_terminated_length": 284.8941345214844, "completions/min_length": 0.0, "completions/min_terminated_length": 65.0, "epoch": 0.04373333333333333, "grad_norm": 0.238211989402771, "learning_rate": 4.416666666666667e-06, "loss": 0.0189, "num_tokens": 9087274.0, "reward": 1.0600254535675049, "reward_std": 0.4129643440246582, "rewards/accuracy_reward_step": 0.421875, "rewards/final_brier_reward_step": 0.5994765758514404, "rewards/format_reward_step": 0.9765625, "step": 41 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.3230203970565624e-08, "aux_brier/mean_group_std": 0.14368066246345312, "aux_brier/mean_r": 0.850863281087842, "aux_brier/n_active_tok": 176.75, "aux_brier/n_groups": 9.1875, "aux_brier/n_step_records": 44.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5253285870755751, "calib/avg_num_step_conf": 5.58984375, "calib/ece": 0.27551181102362204, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.10236220472440945, "calib/gap": 0.029460569550931015, "calib/mean_conf": 0.5637007874015747, "calib/mu_c": 0.5829545454545454, "calib/mu_w": 0.5534939759036144, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.24637795275590552, "calib/std_conf": 0.26609986625001836, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 742.0, "completions/max_terminated_length": 742.0, "completions/mean_length": 262.30859375, "completions/mean_terminated_length": 263.3372802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.0, "epoch": 0.0448, "grad_norm": 0.45979180932044983, "learning_rate": 4.388888888888889e-06, "loss": 0.0198, "num_tokens": 9258793.0, "reward": 1.0074474811553955, "reward_std": 0.35529351234436035, "rewards/accuracy_reward_step": 0.34765625, "rewards/final_brier_reward_step": 0.6626027822494507, "rewards/format_reward_step": 0.98828125, "step": 42 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.329702780307535e-08, "aux_brier/mean_group_std": 0.14957277166942062, "aux_brier/mean_r": 0.8259949757507827, "aux_brier/n_active_tok": 210.625, "aux_brier/n_groups": 14.1875, "aux_brier/n_step_records": 52.65625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5152524544179524, "calib/avg_num_step_conf": 6.6171875, "calib/ece": 0.347479674796748, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.11788617886178862, "calib/gap": 0.01926718092566615, "calib/mean_conf": 0.5630081300813008, "calib/mu_c": 0.5774193548387097, "calib/mu_w": 0.5581521739130435, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32922764227642276, "calib/std_conf": 0.270925648935229, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2167.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 322.0625, "completions/mean_terminated_length": 322.0625, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "epoch": 0.04586666666666667, "grad_norm": 0.09345488250255585, "learning_rate": 4.361111111111112e-06, "loss": 0.0706, "num_tokens": 9446465.0, "reward": 0.8823894262313843, "reward_std": 0.2873356342315674, "rewards/accuracy_reward_step": 0.24609375, "rewards/final_brier_reward_step": 0.6233078241348267, "rewards/format_reward_step": 0.9609375, "step": 43 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.4479341359517903e-08, "aux_brier/mean_group_std": 0.12620192815382447, "aux_brier/mean_r": 0.8583310163183968, "aux_brier/n_active_tok": 207.625, "aux_brier/n_groups": 12.625, "aux_brier/n_step_records": 51.90625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5389643463497452, "calib/avg_num_step_conf": 6.578125, "calib/ece": 0.2923015873015873, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.10317460317460317, "calib/gap": 0.03947368421052644, "calib/mean_conf": 0.5152380952380953, "calib/mu_c": 0.545, "calib/mu_w": 0.5055263157894736, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28075396825396826, "calib/std_conf": 0.28069277755964733, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2139.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 321.21484375, "completions/mean_terminated_length": 322.4745178222656, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.046933333333333334, "grad_norm": 0.22092793881893158, "learning_rate": 4.333333333333334e-06, "loss": 0.056, "num_tokens": 9635016.0, "reward": 0.8987468481063843, "reward_std": 0.2588950991630554, "rewards/accuracy_reward_step": 0.2421875, "rewards/final_brier_reward_step": 0.6653000116348267, "rewards/format_reward_step": 0.98046875, "step": 44 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.225510651436394e-08, "aux_brier/mean_group_std": 0.1263788228170714, "aux_brier/mean_r": 0.8728943111172088, "aux_brier/n_active_tok": 179.75, "aux_brier/n_groups": 10.1875, "aux_brier/n_step_records": 44.9375, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5752585944630362, "calib/avg_num_step_conf": 5.6484375, "calib/ece": 0.17851807228915667, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.06827309236947791, "calib/gap": 0.06491375114085796, "calib/mean_conf": 0.41818875502008035, "calib/mu_c": 0.46328947368421053, "calib/mu_w": 0.3983757225433526, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.984375, "calib/pce": 0.14574297188755023, "calib/std_conf": 0.2666398389756717, "calib/step_conf_rate": 0.984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1953.0, "completions/max_terminated_length": 1953.0, "completions/mean_length": 303.140625, "completions/mean_terminated_length": 305.5275573730469, "completions/min_length": 0.0, "completions/min_terminated_length": 93.0, "epoch": 0.048, "grad_norm": 0.21973006427288055, "learning_rate": 4.305555555555556e-06, "loss": 0.0417, "num_tokens": 9817668.0, "reward": 0.9508191347122192, "reward_std": 0.36955684423446655, "rewards/accuracy_reward_step": 0.296875, "rewards/final_brier_reward_step": 0.7017141580581665, "rewards/format_reward_step": 0.95703125, "step": 45 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.3159327044709457e-08, "aux_brier/mean_group_std": 0.11285619288794033, "aux_brier/mean_r": 0.8738174159879206, "aux_brier/n_active_tok": 211.25, "aux_brier/n_groups": 14.0625, "aux_brier/n_step_records": 52.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5580414312617703, "calib/avg_num_step_conf": 6.6796875, "calib/ece": 0.1801984126984127, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.047619047619047616, "calib/gap": 0.04394802259887015, "calib/mean_conf": 0.4135317460317461, "calib/mu_c": 0.4444000000000001, "calib/mu_w": 0.4004519774011299, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14805555555555555, "calib/std_conf": 0.25272920404949456, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2560.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 321.11328125, "completions/mean_terminated_length": 321.11328125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.04906666666666667, "grad_norm": 0.3134056329727173, "learning_rate": 4.277777777777778e-06, "loss": 0.019, "num_tokens": 10004641.0, "reward": 0.9625797271728516, "reward_std": 0.31446170806884766, "rewards/accuracy_reward_step": 0.29296875, "rewards/final_brier_reward_step": 0.7175066471099854, "rewards/format_reward_step": 0.98046875, "step": 46 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.962741924161243e-08, "aux_brier/mean_group_std": 0.08731022754734642, "aux_brier/mean_r": 0.9216793358362947, "aux_brier/n_active_tok": 197.25, "aux_brier/n_groups": 11.5625, "aux_brier/n_step_records": 49.3125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.49510985116938344, "calib/avg_num_step_conf": 6.21875, "calib/ece": 0.1970395256916996, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": -0.012137278525868211, "calib/mean_conf": 0.3270711462450593, "calib/mu_c": 0.31891566265060234, "calib/mu_w": 0.33105294117647055, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.09802371541501975, "calib/std_conf": 0.23131016016407235, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2347.0, "completions/max_terminated_length": 2347.0, "completions/mean_length": 328.40234375, "completions/mean_terminated_length": 328.40234375, "completions/min_length": 64.0, "completions/min_terminated_length": 64.0, "epoch": 0.050133333333333335, "grad_norm": 0.2886498272418976, "learning_rate": 4.25e-06, "loss": 0.0581, "num_tokens": 10194688.0, "reward": 0.9934947490692139, "reward_std": 0.3531001806259155, "rewards/accuracy_reward_step": 0.32421875, "rewards/final_brier_reward_step": 0.7083537578582764, "rewards/format_reward_step": 0.984375, "step": 47 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.939587028296444e-08, "aux_brier/mean_group_std": 0.08426508432514862, "aux_brier/mean_r": 0.9278154236719309, "aux_brier/n_active_tok": 179.625, "aux_brier/n_groups": 10.1875, "aux_brier/n_step_records": 44.90625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.44981517197042753, "calib/avg_num_step_conf": 5.65234375, "calib/ece": 0.20378486055776893, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0199203187250996, "calib/gap": -0.044140147862423595, "calib/mean_conf": 0.29159362549800794, "calib/mu_c": 0.2594117647058824, "calib/mu_w": 0.303551912568306, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11223107569721116, "calib/std_conf": 0.2137034709713028, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1087.0, "completions/max_terminated_length": 1087.0, "completions/mean_length": 291.2578125, "completions/mean_terminated_length": 292.4000244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 60.0, "epoch": 0.0512, "grad_norm": 0.09665241092443466, "learning_rate": 4.222222222222223e-06, "loss": 0.0011, "num_tokens": 10372938.0, "reward": 0.9341095685958862, "reward_std": 0.29305028915405273, "rewards/accuracy_reward_step": 0.265625, "rewards/final_brier_reward_step": 0.7208132743835449, "rewards/format_reward_step": 0.9765625, "step": 48 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.8392815825940545e-08, "aux_brier/mean_group_std": 0.07645873166200375, "aux_brier/mean_r": 0.9382534881779221, "aux_brier/n_active_tok": 194.875, "aux_brier/n_groups": 10.625, "aux_brier/n_step_records": 48.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5316455696202532, "calib/avg_num_step_conf": 6.140625, "calib/ece": 0.22219291338582675, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007874015748031496, "calib/gap": -0.006096255274261608, "calib/mean_conf": 0.2705629921259842, "calib/mu_c": 0.26677083333333335, "calib/mu_w": 0.27286708860759495, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0574015748031496, "calib/std_conf": 0.208627163322724, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2445.0, "completions/max_terminated_length": 2445.0, "completions/mean_length": 319.4765625, "completions/mean_terminated_length": 319.4765625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.05226666666666667, "grad_norm": 0.2979802191257477, "learning_rate": 4.194444444444445e-06, "loss": 0.0278, "num_tokens": 10559260.0, "reward": 1.0435776710510254, "reward_std": 0.31182634830474854, "rewards/accuracy_reward_step": 0.375, "rewards/final_brier_reward_step": 0.6977483630180359, "rewards/format_reward_step": 0.98828125, "step": 49 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.08828793158267e-07, "aux_brier/mean_group_std": 0.06427214662614472, "aux_brier/mean_r": 0.9369866366067142, "aux_brier/n_active_tok": 206.0, "aux_brier/n_groups": 13.59375, "aux_brier/n_step_records": 51.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5766666666666667, "calib/avg_num_step_conf": 6.53515625, "calib/ece": 0.21261904761904762, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.03258039215686276, "calib/mean_conf": 0.259920634920635, "calib/mu_c": 0.2793137254901961, "calib/mu_w": 0.24673333333333336, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03388888888888889, "calib/std_conf": 0.2018967048157792, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2422.0, "completions/max_terminated_length": 2422.0, "completions/mean_length": 370.22265625, "completions/mean_terminated_length": 370.22265625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.05333333333333334, "grad_norm": 0.18312303721904755, "learning_rate": 4.166666666666667e-06, "loss": 0.0785, "num_tokens": 10759397.0, "reward": 1.0602662563323975, "reward_std": 0.32147830724716187, "rewards/accuracy_reward_step": 0.3984375, "rewards/final_brier_reward_step": 0.694190263748169, "rewards/format_reward_step": 0.9765625, "step": 50 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.72618643329492e-07, "aux_brier/mean_group_std": 0.07035482235431693, "aux_brier/mean_r": 0.9472252913957228, "aux_brier/n_active_tok": 206.125, "aux_brier/n_groups": 12.1875, "aux_brier/n_step_records": 51.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5368758434547909, "calib/avg_num_step_conf": 6.66015625, "calib/ece": 0.22499043824701193, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.01195219123505976, "calib/gap": 0.019218421052631607, "calib/mean_conf": 0.2114239043824701, "calib/mu_c": 0.2233684210526316, "calib/mu_w": 0.20415, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.028964143426294827, "calib/std_conf": 0.17743085805107256, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2576.0, "completions/max_terminated_length": 2576.0, "completions/mean_length": 340.421875, "completions/mean_terminated_length": 341.75689697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 81.0, "epoch": 0.0544, "grad_norm": 0.4266815185546875, "learning_rate": 4.138888888888889e-06, "loss": -0.0116, "num_tokens": 10955841.0, "reward": 1.0364437103271484, "reward_std": 0.2506124675273895, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.7004624009132385, "rewards/format_reward_step": 0.98046875, "step": 51 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.899122092952515e-07, "aux_brier/mean_group_std": 0.0794486182257967, "aux_brier/mean_r": 0.9407123680653106, "aux_brier/n_active_tok": 168.875, "aux_brier/n_groups": 9.71875, "aux_brier/n_step_records": 42.21875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5141870684243566, "calib/avg_num_step_conf": 5.34765625, "calib/ece": 0.36494071146245055, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.006596359070935365, "calib/mean_conf": 0.1799604743083004, "calib/mu_c": 0.18303703703703705, "calib/mu_w": 0.1764406779661017, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005652173913043483, "calib/std_conf": 0.15306228039208883, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1005.0, "completions/max_terminated_length": 1005.0, "completions/mean_length": 300.8984375, "completions/mean_terminated_length": 302.0784606933594, "completions/min_length": 0.0, "completions/min_terminated_length": 78.0, "epoch": 0.055466666666666664, "grad_norm": 0.07729270309209824, "learning_rate": 4.111111111111111e-06, "loss": -0.0015, "num_tokens": 11140823.0, "reward": 1.1750967502593994, "reward_std": 0.32514822483062744, "rewards/accuracy_reward_step": 0.53125, "rewards/final_brier_reward_step": 0.5988246202468872, "rewards/format_reward_step": 0.98828125, "step": 52 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.953884373450456e-07, "aux_brier/mean_group_std": 0.07555331336539535, "aux_brier/mean_r": 0.9359634580074322, "aux_brier/n_active_tok": 204.75, "aux_brier/n_groups": 11.6875, "aux_brier/n_step_records": 51.1875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5449483891147952, "calib/avg_num_step_conf": 6.4765625, "calib/ece": 0.2586929133858268, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": 0.03334250860181423, "calib/mean_conf": 0.22036220472440946, "calib/mu_c": 0.23860869565217394, "calib/mu_w": 0.2052661870503597, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013149606299212599, "calib/std_conf": 0.17698292409696154, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1609.0, "completions/max_terminated_length": 1609.0, "completions/mean_length": 369.98828125, "completions/mean_terminated_length": 369.98828125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.05653333333333333, "grad_norm": 0.11660010367631912, "learning_rate": 4.083333333333334e-06, "loss": -0.0032, "num_tokens": 11341364.0, "reward": 1.1119604110717773, "reward_std": 0.32015568017959595, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6744041442871094, "rewards/format_reward_step": 0.98828125, "step": 53 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.662577220397978e-07, "aux_brier/mean_group_std": 0.07236882672802149, "aux_brier/mean_r": 0.945290777109079, "aux_brier/n_active_tok": 178.375, "aux_brier/n_groups": 10.0625, "aux_brier/n_step_records": 44.59375, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.48472530179847256, "calib/avg_num_step_conf": 5.64453125, "calib/ece": 0.3773098039215686, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.00784313725490196, "calib/gap": -0.021984294161123463, "calib/mean_conf": 0.17727843137254903, "calib/mu_c": 0.16667424242424242, "calib/mu_w": 0.18865853658536588, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.018470588235294114, "calib/std_conf": 0.1651200066424957, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1003.0, "completions/max_terminated_length": 1003.0, "completions/mean_length": 301.01171875, "completions/mean_terminated_length": 302.1921691894531, "completions/min_length": 0.0, "completions/min_terminated_length": 92.0, "epoch": 0.0576, "grad_norm": 0.24732919037342072, "learning_rate": 4.055555555555556e-06, "loss": -0.0221, "num_tokens": 11524655.0, "reward": 1.1660503149032593, "reward_std": 0.25720590353012085, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.5938885807991028, "rewards/format_reward_step": 0.99609375, "step": 54 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1042314287945665e-06, "aux_brier/mean_group_std": 0.05991244713124709, "aux_brier/mean_r": 0.9538390167173045, "aux_brier/n_active_tok": 188.125, "aux_brier/n_groups": 10.71875, "aux_brier/n_step_records": 47.03125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.518529810298103, "calib/avg_num_step_conf": 5.890625, "calib/ece": 0.228996062992126, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.009935636856368557, "calib/mean_conf": 0.1658070866141732, "calib/mu_c": 0.17222222222222222, "calib/mu_w": 0.16228658536585366, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.02023622047244095, "calib/std_conf": 0.13575915953849596, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2063.0, "completions/max_terminated_length": 2063.0, "completions/mean_length": 329.7265625, "completions/mean_terminated_length": 329.7265625, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 0.058666666666666666, "grad_norm": 0.08426016569137573, "learning_rate": 4.027777777777779e-06, "loss": 0.0102, "num_tokens": 11716889.0, "reward": 1.0277278423309326, "reward_std": 0.26626989245414734, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.7124737501144409, "rewards/format_reward_step": 0.98828125, "step": 55 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.280250077067562e-07, "aux_brier/mean_group_std": 0.06364407843383066, "aux_brier/mean_r": 0.9420665660996802, "aux_brier/n_active_tok": 207.375, "aux_brier/n_groups": 12.75, "aux_brier/n_step_records": 51.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5009453781512605, "calib/avg_num_step_conf": 6.5703125, "calib/ece": 0.23035809448818895, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.011811023622047244, "calib/gap": -0.022974341736694687, "calib/mean_conf": 0.1670434803149606, "calib/mu_c": 0.15166695238095237, "calib/mu_w": 0.17464129411764706, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.03334645669291338, "calib/std_conf": 0.1553176301624802, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2393.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 362.73046875, "completions/mean_terminated_length": 364.1529541015625, "completions/min_length": 0.0, "completions/min_terminated_length": 89.0, "epoch": 0.05973333333333333, "grad_norm": 0.12983429431915283, "learning_rate": 4.000000000000001e-06, "loss": 0.0236, "num_tokens": 11916588.0, "reward": 1.0022120475769043, "reward_std": 0.2770439386367798, "rewards/accuracy_reward_step": 0.328125, "rewards/final_brier_reward_step": 0.7119733095169067, "rewards/format_reward_step": 0.9921875, "step": 56 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.376654273221316e-07, "aux_brier/mean_group_std": 0.04942550541668625, "aux_brier/mean_r": 0.9636226090382859, "aux_brier/n_active_tok": 195.125, "aux_brier/n_groups": 10.84375, "aux_brier/n_step_records": 48.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47398989898989896, "calib/avg_num_step_conf": 6.24609375, "calib/ece": 0.34597896825396823, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02168409090909093, "calib/mean_conf": 0.152275, "calib/mu_c": 0.14091666666666666, "calib/mu_w": 0.1626007575757576, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.011031746031746033, "calib/std_conf": 0.11978836219044754, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2107.0, "completions/max_terminated_length": 2107.0, "completions/mean_length": 357.76953125, "completions/mean_terminated_length": 359.1725769042969, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.0608, "grad_norm": 0.5027966499328613, "learning_rate": 3.972222222222223e-06, "loss": -0.0133, "num_tokens": 12114969.0, "reward": 1.117539882659912, "reward_std": 0.27990177273750305, "rewards/accuracy_reward_step": 0.47265625, "rewards/final_brier_reward_step": 0.6107839345932007, "rewards/format_reward_step": 0.984375, "step": 57 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.1905553668498676e-07, "aux_brier/mean_group_std": 0.0489274559063608, "aux_brier/mean_r": 0.9568433270706841, "aux_brier/n_active_tok": 235.0, "aux_brier/n_groups": 16.21875, "aux_brier/n_step_records": 58.75, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.4930229345122962, "calib/avg_num_step_conf": 7.41015625, "calib/ece": 0.24109834677419353, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.004032258064516129, "calib/gap": 0.00532516855484938, "calib/mean_conf": 0.17632100806451612, "calib/mu_c": 0.17962776595744676, "calib/mu_w": 0.17430259740259738, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019193548387096776, "calib/std_conf": 0.14273137815163298, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2774.0, "completions/max_terminated_length": 2774.0, "completions/mean_length": 447.6953125, "completions/mean_terminated_length": 449.4510192871094, "completions/min_length": 0.0, "completions/min_terminated_length": 138.0, "epoch": 0.06186666666666667, "grad_norm": 0.036757275462150574, "learning_rate": 3.944444444444445e-06, "loss": 0.0545, "num_tokens": 12335899.0, "reward": 1.0200591087341309, "reward_std": 0.337308406829834, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6817988753318787, "rewards/format_reward_step": 0.96484375, "step": 58 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.014187950795449e-07, "aux_brier/mean_group_std": 0.05336881759843583, "aux_brier/mean_r": 0.9565652547414072, "aux_brier/n_active_tok": 210.5, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 52.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5045234454638124, "calib/avg_num_step_conf": 6.58984375, "calib/ece": 0.27920948616600794, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0021645642201834348, "calib/mean_conf": 0.16624505928853756, "calib/mu_c": 0.16747706422018346, "calib/mu_w": 0.16531250000000003, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007312252964426878, "calib/std_conf": 0.12189678142362566, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2247.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 386.48828125, "completions/mean_terminated_length": 386.48828125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.06293333333333333, "grad_norm": 0.09482187032699585, "learning_rate": 3.916666666666667e-06, "loss": 0.0794, "num_tokens": 12541088.0, "reward": 1.0827938318252563, "reward_std": 0.28609517216682434, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6593005657196045, "rewards/format_reward_step": 0.984375, "step": 59 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.191440295541753e-07, "aux_brier/mean_group_std": 0.04327769012012292, "aux_brier/mean_r": 0.9651614148333408, "aux_brier/n_active_tok": 193.875, "aux_brier/n_groups": 10.90625, "aux_brier/n_step_records": 48.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5230470490863945, "calib/avg_num_step_conf": 6.09765625, "calib/ece": 0.31693452380952375, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.007616190233653786, "calib/mean_conf": 0.14084325396825398, "calib/mu_c": 0.14504424778761063, "calib/mu_w": 0.13742805755395684, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004682539682539683, "calib/std_conf": 0.10867851707462094, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1680.0, "completions/max_terminated_length": 1680.0, "completions/mean_length": 362.8046875, "completions/mean_terminated_length": 364.22747802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 131.0, "epoch": 0.064, "grad_norm": 0.5038781762123108, "learning_rate": 3.88888888888889e-06, "loss": -0.0006, "num_tokens": 12742822.0, "reward": 1.0935592651367188, "reward_std": 0.3151361644268036, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6398622989654541, "rewards/format_reward_step": 0.984375, "step": 60 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.7857904763829424e-06, "aux_brier/mean_group_std": 0.04825335258349303, "aux_brier/mean_r": 0.9574052712981143, "aux_brier/n_active_tok": 185.625, "aux_brier/n_groups": 10.96875, "aux_brier/n_step_records": 46.40625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5453249007936508, "calib/avg_num_step_conf": 5.83203125, "calib/ece": 0.35142519685039364, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.018094866071428545, "calib/mean_conf": 0.1449527559055118, "calib/mu_c": 0.15407142857142858, "calib/mu_w": 0.13597656250000004, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00015748031496063005, "calib/std_conf": 0.11532670868666751, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2264.0, "completions/max_terminated_length": 2264.0, "completions/mean_length": 330.015625, "completions/mean_terminated_length": 330.015625, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.06506666666666666, "grad_norm": 0.14747215807437897, "learning_rate": 3.861111111111112e-06, "loss": 0.0755, "num_tokens": 12931370.0, "reward": 1.142686367034912, "reward_std": 0.27778032422065735, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.617620587348938, "rewards/format_reward_step": 0.9921875, "step": 61 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.01399150374138e-08, "aux_brier/mean_group_std": 0.05118887125523231, "aux_brier/mean_r": 0.9571227176503799, "aux_brier/n_active_tok": 212.625, "aux_brier/n_groups": 14.5, "aux_brier/n_step_records": 53.15625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5131968758416376, "calib/avg_num_step_conf": 6.7578125, "calib/ece": 0.24676587301587302, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0002524912469701601, "calib/mean_conf": 0.1464880952380952, "calib/mu_c": 0.14632978723404252, "calib/mu_w": 0.14658227848101268, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.010119047619047618, "calib/std_conf": 0.11910074707370537, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2481.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 398.13671875, "completions/mean_terminated_length": 399.69805908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 90.0, "epoch": 0.06613333333333334, "grad_norm": 0.05607409402728081, "learning_rate": 3.833333333333334e-06, "loss": 0.0342, "num_tokens": 13140373.0, "reward": 1.028857707977295, "reward_std": 0.275343656539917, "rewards/accuracy_reward_step": 0.3671875, "rewards/final_brier_reward_step": 0.6857432723045349, "rewards/format_reward_step": 0.98046875, "step": 62 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.936687851755714e-06, "aux_brier/mean_group_std": 0.05187988928767217, "aux_brier/mean_r": 0.9580297117337391, "aux_brier/n_active_tok": 208.125, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 52.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.44259305835010065, "calib/avg_num_step_conf": 6.59375, "calib/ece": 0.29705511811023616, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.003937007874015748, "calib/gap": -0.019607394366197184, "calib/mean_conf": 0.15908661417322836, "calib/mu_c": 0.14812500000000003, "calib/mu_w": 0.16773239436619722, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.007598425196850392, "calib/std_conf": 0.12720808940392123, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2419.0, "completions/max_terminated_length": 2419.0, "completions/mean_length": 395.234375, "completions/mean_terminated_length": 395.234375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0672, "grad_norm": 0.07673514634370804, "learning_rate": 3.8055555555555556e-06, "loss": 0.0305, "num_tokens": 13350193.0, "reward": 1.0943763256072998, "reward_std": 0.30822843313217163, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6431306004524231, "rewards/format_reward_step": 0.9921875, "step": 63 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.176654963052641e-07, "aux_brier/mean_group_std": 0.043067219126586974, "aux_brier/mean_r": 0.961962838982981, "aux_brier/n_active_tok": 215.25, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 53.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4872828396450443, "calib/avg_num_step_conf": 6.81640625, "calib/ece": 0.36685770750988134, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.009485751781027424, "calib/mean_conf": 0.13907114624505929, "calib/mu_c": 0.14379527559055125, "calib/mu_w": 0.13430952380952382, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.001976284584980237, "calib/std_conf": 0.11225086105583791, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2997.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 396.078125, "completions/mean_terminated_length": 397.63140869140625, "completions/min_length": 0.0, "completions/min_terminated_length": 99.0, "epoch": 0.06826666666666667, "grad_norm": 0.04356149584054947, "learning_rate": 3.777777777777778e-06, "loss": 0.0781, "num_tokens": 13555365.0, "reward": 1.1410574913024902, "reward_std": 0.3345259726047516, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6032926440238953, "rewards/format_reward_step": 0.98828125, "step": 64 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.283241036822204e-07, "aux_brier/mean_group_std": 0.04073559344267951, "aux_brier/mean_r": 0.9675460601660378, "aux_brier/n_active_tok": 189.5, "aux_brier/n_groups": 11.0, "aux_brier/n_step_records": 47.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5487706193588548, "calib/avg_num_step_conf": 6.15625, "calib/ece": 0.3487322834645669, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.01530108932461871, "calib/mean_conf": 0.1292204724409449, "calib/mu_c": 0.13735294117647057, "calib/mu_w": 0.12205185185185186, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.004724409448818898, "calib/std_conf": 0.10920650909228467, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 328.2421875, "completions/mean_terminated_length": 329.5294189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 105.0, "epoch": 0.06933333333333333, "grad_norm": 0.2852790951728821, "learning_rate": 3.7500000000000005e-06, "loss": -0.0273, "num_tokens": 13744419.0, "reward": 1.1175971031188965, "reward_std": 0.28578680753707886, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.626638650894165, "rewards/format_reward_step": 0.9921875, "step": 65 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.791922440708138e-07, "aux_brier/mean_group_std": 0.04096224626962034, "aux_brier/mean_r": 0.9695239851345836, "aux_brier/n_active_tok": 222.375, "aux_brier/n_groups": 14.75, "aux_brier/n_step_records": 55.59375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4939931350114416, "calib/avg_num_step_conf": 7.10546875, "calib/ece": 0.3323214285714286, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007744088482074779, "calib/mean_conf": 0.13283730158730161, "calib/mu_c": 0.12859649122807018, "calib/mu_w": 0.13634057971014496, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0063888888888888875, "calib/std_conf": 0.11622700027411259, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2386.0, "completions/max_terminated_length": 2386.0, "completions/mean_length": 413.87109375, "completions/mean_terminated_length": 415.494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 111.0, "epoch": 0.0704, "grad_norm": 0.03689504787325859, "learning_rate": 3.7222222222222225e-06, "loss": 0.0632, "num_tokens": 13956722.0, "reward": 1.0903115272521973, "reward_std": 0.27287259697914124, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6190588474273682, "rewards/format_reward_step": 0.98046875, "step": 66 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.273451621449655e-07, "aux_brier/mean_group_std": 0.05023670370776576, "aux_brier/mean_r": 0.9571496179015527, "aux_brier/n_active_tok": 205.0, "aux_brier/n_groups": 12.8125, "aux_brier/n_step_records": 51.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5066753601589666, "calib/avg_num_step_conf": 6.640625, "calib/ece": 0.4093700787401574, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.007669523099850972, "calib/mean_conf": 0.12425196850393702, "calib/mu_c": 0.12056818181818182, "calib/mu_w": 0.1282377049180328, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.006968503937007873, "calib/std_conf": 0.10168768524639875, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2397.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 392.375, "completions/mean_terminated_length": 393.91375732421875, "completions/min_length": 0.0, "completions/min_terminated_length": 118.0, "epoch": 0.07146666666666666, "grad_norm": 0.19599691033363342, "learning_rate": 3.694444444444445e-06, "loss": -0.0191, "num_tokens": 14162178.0, "reward": 1.1526442766189575, "reward_std": 0.26171380281448364, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.5715146660804749, "rewards/format_reward_step": 0.98828125, "step": 67 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.572307031354029e-07, "aux_brier/mean_group_std": 0.03814839863849587, "aux_brier/mean_r": 0.9717990202583285, "aux_brier/n_active_tok": 199.25, "aux_brier/n_groups": 11.8125, "aux_brier/n_step_records": 49.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.506359649122807, "calib/avg_num_step_conf": 6.2734375, "calib/ece": 0.33337244094488183, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.000802481203007549, "calib/mean_conf": 0.1270212598425197, "calib/mu_c": 0.12657894736842107, "calib/mu_w": 0.12738142857142862, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.005787401574803149, "calib/std_conf": 0.08653963283927257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2688.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 380.609375, "completions/mean_terminated_length": 380.609375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.07253333333333334, "grad_norm": 0.05051703378558159, "learning_rate": 3.6666666666666666e-06, "loss": 0.0599, "num_tokens": 14363702.0, "reward": 1.1004488468170166, "reward_std": 0.2596089243888855, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6361704468727112, "rewards/format_reward_step": 0.9921875, "step": 68 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.0029460362790417e-08, "aux_brier/mean_group_std": 0.050548947233676395, "aux_brier/mean_r": 0.9439660831761795, "aux_brier/n_active_tok": 209.25, "aux_brier/n_groups": 13.5, "aux_brier/n_step_records": 52.3125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5109879032258065, "calib/avg_num_step_conf": 6.54296875, "calib/ece": 0.26138339920948606, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.0048487903225806345, "calib/mean_conf": 0.12209486166007906, "calib/mu_c": 0.12516129032258064, "calib/mu_w": 0.1203125, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.007944664031620552, "calib/std_conf": 0.11601399880299575, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2431.0, "completions/max_terminated_length": 2431.0, "completions/mean_length": 432.25390625, "completions/mean_terminated_length": 432.25390625, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.0736, "grad_norm": 0.007314007729291916, "learning_rate": 3.638888888888889e-06, "loss": 0.0849, "num_tokens": 14578855.0, "reward": 1.02939772605896, "reward_std": 0.26667526364326477, "rewards/accuracy_reward_step": 0.36328125, "rewards/final_brier_reward_step": 0.6879035234451294, "rewards/format_reward_step": 0.98828125, "step": 69 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.035032152820527e-07, "aux_brier/mean_group_std": 0.04170518192401591, "aux_brier/mean_r": 0.9653501564889758, "aux_brier/n_active_tok": 213.25, "aux_brier/n_groups": 13.875, "aux_brier/n_step_records": 53.3125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.43133814929480563, "calib/avg_num_step_conf": 6.80078125, "calib/ece": 0.29827477016129034, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.027646224905400765, "calib/mean_conf": 0.11261232661290324, "calib/mu_c": 0.09555638947368421, "calib/mu_w": 0.12320261437908497, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.013911290322580643, "calib/std_conf": 0.09021506938097142, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2762.0, "completions/max_terminated_length": 2762.0, "completions/mean_length": 452.859375, "completions/mean_terminated_length": 454.63531494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 82.0, "epoch": 0.07466666666666667, "grad_norm": 0.1959269791841507, "learning_rate": 3.6111111111111115e-06, "loss": 0.1052, "num_tokens": 14801779.0, "reward": 1.0146573781967163, "reward_std": 0.21777372062206268, "rewards/accuracy_reward_step": 0.37109375, "rewards/final_brier_reward_step": 0.6445671319961548, "rewards/format_reward_step": 0.96484375, "step": 70 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.300020816610072e-06, "aux_brier/mean_group_std": 0.0498628349326778, "aux_brier/mean_r": 0.959245374749235, "aux_brier/n_active_tok": 221.0, "aux_brier/n_groups": 13.8125, "aux_brier/n_step_records": 55.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5055690692612982, "calib/avg_num_step_conf": 7.09375, "calib/ece": 0.31400000000000006, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.008718922033030332, "calib/mean_conf": 0.12244268774703558, "calib/mu_c": 0.11741121495327105, "calib/mu_w": 0.12613013698630138, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.00675889328063241, "calib/std_conf": 0.09537495007944927, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2343.0, "completions/max_terminated_length": 2343.0, "completions/mean_length": 429.6484375, "completions/mean_terminated_length": 431.3333740234375, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.07573333333333333, "grad_norm": 0.05317368730902672, "learning_rate": 3.5833333333333335e-06, "loss": -0.0008, "num_tokens": 15016177.0, "reward": 1.071134328842163, "reward_std": 0.30423712730407715, "rewards/accuracy_reward_step": 0.41796875, "rewards/final_brier_reward_step": 0.6439124345779419, "rewards/format_reward_step": 0.984375, "step": 71 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.04259339126645e-07, "aux_brier/mean_group_std": 0.04461903981817749, "aux_brier/mean_r": 0.9693644108698314, "aux_brier/n_active_tok": 226.125, "aux_brier/n_groups": 13.15625, "aux_brier/n_step_records": 56.53125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4859782889635565, "calib/avg_num_step_conf": 7.30078125, "calib/ece": 0.32679365079365075, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012595502713879533, "calib/mean_conf": 0.10852380952380954, "calib/mu_c": 0.10122641509433963, "calib/mu_w": 0.11382191780821917, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0073412698412698395, "calib/std_conf": 0.07964167911435692, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2722.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 430.29296875, "completions/mean_terminated_length": 431.98040771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.0768, "grad_norm": 0.21096296608448029, "learning_rate": 3.555555555555556e-06, "loss": 0.0966, "num_tokens": 15230740.0, "reward": 1.0624244213104248, "reward_std": 0.24476802349090576, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.6325101852416992, "rewards/format_reward_step": 0.98046875, "step": 72 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.271813797423478e-07, "aux_brier/mean_group_std": 0.0449266087964411, "aux_brier/mean_r": 0.9717982172081372, "aux_brier/n_active_tok": 189.0, "aux_brier/n_groups": 10.75, "aux_brier/n_step_records": 47.25, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.48421515094458045, "calib/avg_num_step_conf": 5.99609375, "calib/ece": 0.46781102362204724, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.012478127157471944, "calib/mean_conf": 0.09840944881889764, "calib/mu_c": 0.09285815602836879, "calib/mu_w": 0.10533628318584073, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.005551181102362204, "calib/std_conf": 0.07283800971162871, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2835.0, "completions/max_terminated_length": 2835.0, "completions/mean_length": 405.515625, "completions/mean_terminated_length": 405.515625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.07786666666666667, "grad_norm": 0.009398190304636955, "learning_rate": 3.5277777777777784e-06, "loss": 0.0035, "num_tokens": 15441584.0, "reward": 1.1711393594741821, "reward_std": 0.2896115183830261, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.5204952955245972, "rewards/format_reward_step": 0.98046875, "step": 73 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.378562431459841e-06, "aux_brier/mean_group_std": 0.02836727570329467, "aux_brier/mean_r": 0.9733670951190804, "aux_brier/n_active_tok": 227.0, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 56.75, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.514908256880734, "calib/avg_num_step_conf": 7.25, "calib/ece": 0.3472409638554217, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.002470511140235901, "calib/mean_conf": 0.09051004016064257, "calib/mu_c": 0.09189908256880733, "calib/mu_w": 0.08942857142857143, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.0, "calib/std_conf": 0.05520186485193156, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2868.0, "completions/max_terminated_length": 2868.0, "completions/mean_length": 460.3671875, "completions/mean_terminated_length": 460.3671875, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.07893333333333333, "grad_norm": 0.056083135306835175, "learning_rate": 3.5e-06, "loss": 0.1295, "num_tokens": 15663366.0, "reward": 1.0636870861053467, "reward_std": 0.2997918426990509, "rewards/accuracy_reward_step": 0.42578125, "rewards/final_brier_reward_step": 0.6141231060028076, "rewards/format_reward_step": 0.96875, "step": 74 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.4267376573506851e-06, "aux_brier/mean_group_std": 0.06698737739295728, "aux_brier/mean_r": 0.9491829469277482, "aux_brier/n_active_tok": 209.875, "aux_brier/n_groups": 11.375, "aux_brier/n_step_records": 52.46875, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6204035576795434, "calib/avg_num_step_conf": 6.8203125, "calib/ece": 0.528235294117647, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02168956590999603, "calib/mean_conf": 0.10705882352941176, "calib/mu_c": 0.11496913580246913, "calib/mu_w": 0.0932795698924731, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.06616213352319744, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1526.0, "completions/max_terminated_length": 1526.0, "completions/mean_length": 393.40625, "completions/mean_terminated_length": 394.94903564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 140.0, "epoch": 0.08, "grad_norm": 0.058377038687467575, "learning_rate": 3.4722222222222224e-06, "loss": 0.008, "num_tokens": 15868830.0, "reward": 1.2541123628616333, "reward_std": 0.23057867586612701, "rewards/accuracy_reward_step": 0.6328125, "rewards/final_brier_reward_step": 0.4930119216442108, "rewards/format_reward_step": 0.99609375, "step": 75 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.131059898879961e-06, "aux_brier/mean_group_std": 0.05453229834066376, "aux_brier/mean_r": 0.9576792566254494, "aux_brier/n_active_tok": 223.0, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 55.75, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.48114722507708124, "calib/avg_num_step_conf": 6.97265625, "calib/ece": 0.4586852589641434, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.00398406374501992, "calib/gap": -0.0032759506680370143, "calib/mean_conf": 0.10282868525896416, "calib/mu_c": 0.10136690647482015, "calib/mu_w": 0.10464285714285716, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0038645418326693224, "calib/std_conf": 0.0881254907808353, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2949.0, "completions/max_terminated_length": 2949.0, "completions/mean_length": 465.45703125, "completions/mean_terminated_length": 465.45703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.08106666666666666, "grad_norm": 0.04971994459629059, "learning_rate": 3.444444444444445e-06, "loss": 0.0649, "num_tokens": 16091043.0, "reward": 1.162674903869629, "reward_std": 0.29774484038352966, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.5256999731063843, "rewards/format_reward_step": 0.9765625, "step": 76 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.984068840806598e-06, "aux_brier/mean_group_std": 0.05525603786164576, "aux_brier/mean_r": 0.9603699655860797, "aux_brier/n_active_tok": 195.125, "aux_brier/n_groups": 10.9375, "aux_brier/n_step_records": 48.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5084291187739464, "calib/avg_num_step_conf": 6.296875, "calib/ece": 0.44645019920318724, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.003996871008939959, "calib/mean_conf": 0.09538247011952192, "calib/mu_c": 0.09722962962962962, "calib/mu_w": 0.09323275862068967, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.00199203187250996, "calib/std_conf": 0.05840473166406211, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2911.0, "completions/max_terminated_length": 2911.0, "completions/mean_length": 378.9765625, "completions/mean_terminated_length": 381.96063232421875, "completions/min_length": 0.0, "completions/min_terminated_length": 119.0, "epoch": 0.08213333333333334, "grad_norm": 0.03173414245247841, "learning_rate": 3.416666666666667e-06, "loss": 0.0193, "num_tokens": 16292725.0, "reward": 1.1478147506713867, "reward_std": 0.3370266854763031, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5365713834762573, "rewards/format_reward_step": 0.97265625, "step": 77 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.471056108794173e-07, "aux_brier/mean_group_std": 0.06261163814577028, "aux_brier/mean_r": 0.9559191122970769, "aux_brier/n_active_tok": 216.125, "aux_brier/n_groups": 13.96875, "aux_brier/n_step_records": 54.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5613479262672811, "calib/avg_num_step_conf": 6.86328125, "calib/ece": 0.3994336, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.004440706605222736, "calib/mean_conf": 0.1083264, "calib/mu_c": 0.11056451612903227, "calib/mu_w": 0.10612380952380954, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.00588, "calib/std_conf": 0.08427593549193031, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2662.0, "completions/max_terminated_length": 2662.0, "completions/mean_length": 484.21484375, "completions/mean_terminated_length": 486.1137390136719, "completions/min_length": 0.0, "completions/min_terminated_length": 168.0, "epoch": 0.0832, "grad_norm": 0.026359865441918373, "learning_rate": 3.3888888888888893e-06, "loss": 0.0537, "num_tokens": 16524708.0, "reward": 1.1097559928894043, "reward_std": 0.28809860348701477, "rewards/accuracy_reward_step": 0.484375, "rewards/final_brier_reward_step": 0.5718364715576172, "rewards/format_reward_step": 0.96484375, "step": 78 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.4943519655605826e-07, "aux_brier/mean_group_std": 0.05061152028513624, "aux_brier/mean_r": 0.9596157720363296, "aux_brier/n_active_tok": 227.125, "aux_brier/n_groups": 14.375, "aux_brier/n_step_records": 56.78125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5238305941845764, "calib/avg_num_step_conf": 7.18359375, "calib/ece": 0.441494071146245, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.010855878634639687, "calib/mean_conf": 0.11186561264822134, "calib/mu_c": 0.11671428571428571, "calib/mu_w": 0.10585840707964603, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0, "calib/std_conf": 0.09014137843436602, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1355.0, "completions/max_terminated_length": 1355.0, "completions/mean_length": 460.18359375, "completions/mean_terminated_length": 461.9882507324219, "completions/min_length": 0.0, "completions/min_terminated_length": 112.0, "epoch": 0.08426666666666667, "grad_norm": 0.10766593366861343, "learning_rate": 3.3611111111111117e-06, "loss": 0.024, "num_tokens": 16748891.0, "reward": 1.178181767463684, "reward_std": 0.2775796353816986, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.5486649870872498, "rewards/format_reward_step": 0.98828125, "step": 79 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.176186380159933e-07, "aux_brier/mean_group_std": 0.05718390000940116, "aux_brier/mean_r": 0.9511804906700442, "aux_brier/n_active_tok": 234.75, "aux_brier/n_groups": 13.9375, "aux_brier/n_step_records": 58.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5629880937139932, "calib/avg_num_step_conf": 7.5703125, "calib/ece": 0.44301185770750984, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.02323620535142744, "calib/mean_conf": 0.13406324110671938, "calib/mu_c": 0.1438904109589041, "calib/mu_w": 0.12065420560747665, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.0, "calib/std_conf": 0.09186467509443817, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2329.0, "completions/max_terminated_length": 2329.0, "completions/mean_length": 410.14453125, "completions/mean_terminated_length": 411.7529602050781, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.08533333333333333, "grad_norm": 0.08334469795227051, "learning_rate": 3.3333333333333333e-06, "loss": -0.0114, "num_tokens": 16956048.0, "reward": 1.2005274295806885, "reward_std": 0.29282620549201965, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.552109956741333, "rewards/format_reward_step": 0.984375, "step": 80 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.260690728008498e-07, "aux_brier/mean_group_std": 0.053407029620857686, "aux_brier/mean_r": 0.9566165149535798, "aux_brier/n_active_tok": 238.875, "aux_brier/n_groups": 15.03125, "aux_brier/n_step_records": 59.71875, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5560224089635853, "calib/avg_num_step_conf": 7.76171875, "calib/ece": 0.4004979591836735, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.0008958916900093095, "calib/mean_conf": 0.1310938775510204, "calib/mu_c": 0.1306587301587302, "calib/mu_w": 0.1315546218487395, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.008653061224489797, "calib/std_conf": 0.08097116747655067, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2931.0, "completions/max_terminated_length": 2931.0, "completions/mean_length": 455.3203125, "completions/mean_terminated_length": 462.5476379394531, "completions/min_length": 0.0, "completions/min_terminated_length": 88.0, "epoch": 0.0864, "grad_norm": 0.14797793328762054, "learning_rate": 3.3055555555555558e-06, "loss": 0.0465, "num_tokens": 17178858.0, "reward": 1.1143836975097656, "reward_std": 0.28239357471466064, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.5669094324111938, "rewards/format_reward_step": 0.953125, "step": 81 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.401173662766823e-07, "aux_brier/mean_group_std": 0.0419831660674612, "aux_brier/mean_r": 0.9644077503974919, "aux_brier/n_active_tok": 198.625, "aux_brier/n_groups": 11.3125, "aux_brier/n_step_records": 49.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5053940568475452, "calib/avg_num_step_conf": 6.2421875, "calib/ece": 0.404433734939759, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.005236821705426345, "calib/mean_conf": 0.12142971887550202, "calib/mu_c": 0.12395348837209302, "calib/mu_w": 0.11871666666666668, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.003895582329317269, "calib/std_conf": 0.07295184691420815, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2693.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 410.90234375, "completions/mean_terminated_length": 412.5137634277344, "completions/min_length": 0.0, "completions/min_terminated_length": 133.0, "epoch": 0.08746666666666666, "grad_norm": 0.022786911576986313, "learning_rate": 3.277777777777778e-06, "loss": 0.0576, "num_tokens": 17389601.0, "reward": 1.1237928867340088, "reward_std": 0.2836204171180725, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.5654839277267456, "rewards/format_reward_step": 0.95703125, "step": 82 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.690216071466912e-06, "aux_brier/mean_group_std": 0.056603451192411613, "aux_brier/mean_r": 0.9445548420950379, "aux_brier/n_active_tok": 248.375, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 62.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4390629946185502, "calib/avg_num_step_conf": 8.0, "calib/ece": 0.3851011904761904, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.02184586894586893, "calib/mean_conf": 0.16394642857142858, "calib/mu_c": 0.15380370370370372, "calib/mu_w": 0.17564957264957265, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.006666666666666666, "calib/std_conf": 0.09272470619009018, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2530.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 511.10546875, "completions/mean_terminated_length": 515.1299438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 0.08853333333333334, "grad_norm": 0.06263367086648941, "learning_rate": 3.2500000000000002e-06, "loss": 0.0592, "num_tokens": 17627708.0, "reward": 1.1604933738708496, "reward_std": 0.218886137008667, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.5794734358787537, "rewards/format_reward_step": 0.9765625, "step": 83 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.366732909270702e-07, "aux_brier/mean_group_std": 0.06194382538811641, "aux_brier/mean_r": 0.9414539129239293, "aux_brier/n_active_tok": 228.25, "aux_brier/n_groups": 14.0, "aux_brier/n_step_records": 57.0625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5344127062299926, "calib/avg_num_step_conf": 7.13671875, "calib/ece": 0.3483137254901961, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.010968973159320355, "calib/mean_conf": 0.18321568627450982, "calib/mu_c": 0.18854961832061065, "calib/mu_w": 0.1775806451612903, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.008901960784313726, "calib/std_conf": 0.1281721137156322, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3005.0, "completions/max_terminated_length": 3005.0, "completions/mean_length": 431.24609375, "completions/mean_terminated_length": 431.24609375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.0896, "grad_norm": 0.06950856745243073, "learning_rate": 3.2222222222222227e-06, "loss": -0.0013, "num_tokens": 17844027.0, "reward": 1.1666513681411743, "reward_std": 0.23589617013931274, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6275429725646973, "rewards/format_reward_step": 0.99609375, "step": 84 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.996977371416136e-07, "aux_brier/mean_group_std": 0.057099130091725056, "aux_brier/mean_r": 0.9383193974535409, "aux_brier/n_active_tok": 234.5, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 58.625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4952307790794443, "calib/avg_num_step_conf": 7.328125, "calib/ece": 0.34376, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": -0.016674988797132084, "calib/mean_conf": 0.20096, "calib/mu_c": 0.19275590551181102, "calib/mu_w": 0.2094308943089431, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01836, "calib/std_conf": 0.12515222091517192, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2906.0, "completions/max_terminated_length": 2906.0, "completions/mean_length": 496.01171875, "completions/mean_terminated_length": 497.9568786621094, "completions/min_length": 0.0, "completions/min_terminated_length": 124.0, "epoch": 0.09066666666666667, "grad_norm": 0.4499802589416504, "learning_rate": 3.1944444444444443e-06, "loss": 0.0335, "num_tokens": 18078830.0, "reward": 1.1386210918426514, "reward_std": 0.23980866372585297, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6169843673706055, "rewards/format_reward_step": 0.9765625, "step": 85 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.5307811555297377e-06, "aux_brier/mean_group_std": 0.06543589677221735, "aux_brier/mean_r": 0.9326995076108595, "aux_brier/n_active_tok": 233.5, "aux_brier/n_groups": 15.21875, "aux_brier/n_step_records": 58.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.593401722391084, "calib/avg_num_step_conf": 7.40234375, "calib/ece": 0.25393280632411064, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": 0.03494680851063825, "calib/mean_conf": 0.21302371541501974, "calib/mu_c": 0.23249999999999998, "calib/mu_w": 0.19755319148936173, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.012134387351778657, "calib/std_conf": 0.14985911509911753, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2390.0, "completions/max_terminated_length": 2390.0, "completions/mean_length": 490.1640625, "completions/mean_terminated_length": 492.0863037109375, "completions/min_length": 0.0, "completions/min_terminated_length": 117.0, "epoch": 0.09173333333333333, "grad_norm": 0.33087992668151855, "learning_rate": 3.1666666666666667e-06, "loss": 0.0163, "num_tokens": 18309824.0, "reward": 1.0977025032043457, "reward_std": 0.2513139843940735, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6798721551895142, "rewards/format_reward_step": 0.98046875, "step": 86 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.671883360922834e-06, "aux_brier/mean_group_std": 0.06403182758078232, "aux_brier/mean_r": 0.9349012693869212, "aux_brier/n_active_tok": 189.875, "aux_brier/n_groups": 10.15625, "aux_brier/n_step_records": 47.46875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5668133166984044, "calib/avg_num_step_conf": 5.93359375, "calib/ece": 0.4348228346456692, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.0, "calib/gap": 0.02279499638846938, "calib/mean_conf": 0.1891929133858268, "calib/mu_c": 0.19789808917197452, "calib/mu_w": 0.17510309278350514, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.002952755905511811, "calib/std_conf": 0.11869134767442834, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 389.29296875, "completions/mean_terminated_length": 390.81964111328125, "completions/min_length": 0.0, "completions/min_terminated_length": 110.0, "epoch": 0.0928, "grad_norm": 0.055273041129112244, "learning_rate": 3.138888888888889e-06, "loss": 0.024, "num_tokens": 18514979.0, "reward": 1.2524123191833496, "reward_std": 0.23912067711353302, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.5721486806869507, "rewards/format_reward_step": 0.9921875, "step": 87 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.346658429213221e-07, "aux_brier/mean_group_std": 0.05874770184713301, "aux_brier/mean_r": 0.9336913212429232, "aux_brier/n_active_tok": 225.125, "aux_brier/n_groups": 13.3125, "aux_brier/n_step_records": 56.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6215100523098254, "calib/avg_num_step_conf": 7.0625, "calib/ece": 0.2942039682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.007936507936507936, "calib/gap": 0.04145853658536586, "calib/mean_conf": 0.2364309523809524, "calib/mu_c": 0.25666666666666665, "calib/mu_w": 0.2152081300813008, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.009365079365079364, "calib/std_conf": 0.1510091760926214, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2739.0, "completions/max_terminated_length": 2739.0, "completions/mean_length": 483.390625, "completions/mean_terminated_length": 483.390625, "completions/min_length": 195.0, "completions/min_terminated_length": 195.0, "epoch": 0.09386666666666667, "grad_norm": 0.3894507586956024, "learning_rate": 3.1111111111111116e-06, "loss": 0.082, "num_tokens": 18748575.0, "reward": 1.1596109867095947, "reward_std": 0.22322949767112732, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.6540689468383789, "rewards/format_reward_step": 0.9765625, "step": 88 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.5983053953116944e-06, "aux_brier/mean_group_std": 0.05685648627614638, "aux_brier/mean_r": 0.9327129965170959, "aux_brier/n_active_tok": 226.125, "aux_brier/n_groups": 12.96875, "aux_brier/n_step_records": 56.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5610586011342154, "calib/avg_num_step_conf": 7.1875, "calib/ece": 0.23624505928853756, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.003952569169960474, "calib/gap": 0.020043478260869607, "calib/mean_conf": 0.24628458498023717, "calib/mu_c": 0.2572173913043479, "calib/mu_w": 0.2371739130434783, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.013992094861660078, "calib/std_conf": 0.1400212788724654, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2247.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 493.38671875, "completions/mean_terminated_length": 497.2716369628906, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.09493333333333333, "grad_norm": 0.4990207254886627, "learning_rate": 3.0833333333333336e-06, "loss": 0.0208, "num_tokens": 18983770.0, "reward": 1.1160681247711182, "reward_std": 0.24893710017204285, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.6908347606658936, "rewards/format_reward_step": 0.98828125, "step": 89 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.048488792737006e-07, "aux_brier/mean_group_std": 0.08312437496410667, "aux_brier/mean_r": 0.9000635924668137, "aux_brier/n_active_tok": 231.0, "aux_brier/n_groups": 13.3125, "aux_brier/n_step_records": 57.75, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.45340114431023515, "calib/avg_num_step_conf": 7.23828125, "calib/ece": 0.3158498023715415, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.007905138339920948, "calib/gap": -0.027391608391608313, "calib/mean_conf": 0.2889723320158103, "calib/mu_c": 0.2770629370629371, "calib/mu_w": 0.3044545454545454, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.019802371541501967, "calib/std_conf": 0.1606869247077436, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2975.0, "completions/max_terminated_length": 2975.0, "completions/mean_length": 466.8515625, "completions/mean_terminated_length": 466.8515625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.096, "grad_norm": 0.2098333239555359, "learning_rate": 3.055555555555556e-06, "loss": 0.0215, "num_tokens": 19206604.0, "reward": 1.2144343852996826, "reward_std": 0.2846514880657196, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.631174623966217, "rewards/format_reward_step": 0.98828125, "step": 90 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.631682916759864e-07, "aux_brier/mean_group_std": 0.05885228361090886, "aux_brier/mean_r": 0.9083179479811493, "aux_brier/n_active_tok": 241.5, "aux_brier/n_groups": 14.15625, "aux_brier/n_step_records": 60.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5149176954732511, "calib/avg_num_step_conf": 7.6328125, "calib/ece": 0.29948412698412696, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": -0.004629629629629595, "calib/mean_conf": 0.3128174603174603, "calib/mu_c": 0.31083333333333335, "calib/mu_w": 0.31546296296296295, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02043650793650794, "calib/std_conf": 0.16148737730383342, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2512.0, "completions/max_terminated_length": 2512.0, "completions/mean_length": 488.08984375, "completions/mean_terminated_length": 488.08984375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.09706666666666666, "grad_norm": 0.10228096693754196, "learning_rate": 3.0277777777777776e-06, "loss": 0.0142, "num_tokens": 19439267.0, "reward": 1.2170789241790771, "reward_std": 0.24731676280498505, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6495659947395325, "rewards/format_reward_step": 0.984375, "step": 91 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.660268711331514e-06, "aux_brier/mean_group_std": 0.041815796396915884, "aux_brier/mean_r": 0.9200492032263131, "aux_brier/n_active_tok": 218.0, "aux_brier/n_groups": 12.875, "aux_brier/n_step_records": 54.5, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.6347257653061225, "calib/avg_num_step_conf": 6.9375, "calib/ece": 0.2732539682539682, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.011904761904761904, "calib/gap": 0.06925000000000003, "calib/mean_conf": 0.29365079365079366, "calib/mu_c": 0.32442857142857146, "calib/mu_w": 0.2551785714285714, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.005674603174603174, "calib/std_conf": 0.17074813519910925, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2866.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 472.7734375, "completions/mean_terminated_length": 474.6274719238281, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.09813333333333334, "grad_norm": 0.3245234787464142, "learning_rate": 3e-06, "loss": -0.0329, "num_tokens": 19667017.0, "reward": 1.204309344291687, "reward_std": 0.27141866087913513, "rewards/accuracy_reward_step": 0.546875, "rewards/final_brier_reward_step": 0.6766124963760376, "rewards/format_reward_step": 0.9765625, "step": 92 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.402533863665184e-07, "aux_brier/mean_group_std": 0.0812354678954359, "aux_brier/mean_r": 0.8682917187290476, "aux_brier/n_active_tok": 267.0, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 66.75, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.36983497102544727, "calib/avg_num_step_conf": 8.609375, "calib/ece": 0.30675198412698407, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05555555555555555, "calib/gap": -0.11300396825396819, "calib/mean_conf": 0.43023214285714284, "calib/mu_c": 0.3737301587301588, "calib/mu_w": 0.486734126984127, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11849206349206348, "calib/std_conf": 0.22850982638072923, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2613.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 525.28125, "completions/mean_terminated_length": 527.3411865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 0.0992, "grad_norm": 0.1926182359457016, "learning_rate": 2.9722222222222225e-06, "loss": 0.0354, "num_tokens": 19907265.0, "reward": 1.138358235359192, "reward_std": 0.2905207872390747, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6237451434135437, "rewards/format_reward_step": 0.98046875, "step": 93 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.345957592664766e-07, "aux_brier/mean_group_std": 0.07341426312216502, "aux_brier/mean_r": 0.8903702374658317, "aux_brier/n_active_tok": 229.125, "aux_brier/n_groups": 12.625, "aux_brier/n_step_records": 57.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5557502189963708, "calib/avg_num_step_conf": 7.28515625, "calib/ece": 0.14703557312252963, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.015810276679841896, "calib/gap": 0.04438555875359779, "calib/mean_conf": 0.3852964426877471, "calib/mu_c": 0.40827868852459015, "calib/mu_w": 0.36389312977099236, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.02505928853754942, "calib/std_conf": 0.18191887570871856, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2590.0, "completions/max_terminated_length": 2590.0, "completions/mean_length": 445.7734375, "completions/mean_terminated_length": 449.2834777832031, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.10026666666666667, "grad_norm": 0.09607866406440735, "learning_rate": 2.944444444444445e-06, "loss": -0.0054, "num_tokens": 20130063.0, "reward": 1.1483310461044312, "reward_std": 0.21412217617034912, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.718323826789856, "rewards/format_reward_step": 0.984375, "step": 94 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.395376028279216e-06, "aux_brier/mean_group_std": 0.08585101812496332, "aux_brier/mean_r": 0.859334020540006, "aux_brier/n_active_tok": 256.5, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 64.125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5002564102564103, "calib/avg_num_step_conf": 8.171875, "calib/ece": 0.25031496062992126, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.05511811023622047, "calib/gap": 0.0030397435897436265, "calib/mean_conf": 0.4236220472440945, "calib/mu_c": 0.42486666666666667, "calib/mu_w": 0.42182692307692304, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.04169291338582677, "calib/std_conf": 0.22089891261085734, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1620.0, "completions/max_terminated_length": 1620.0, "completions/mean_length": 509.57421875, "completions/mean_terminated_length": 513.5866088867188, "completions/min_length": 0.0, "completions/min_terminated_length": 115.0, "epoch": 0.10133333333333333, "grad_norm": 0.08091621100902557, "learning_rate": 2.916666666666667e-06, "loss": -0.0238, "num_tokens": 20366642.0, "reward": 1.2514491081237793, "reward_std": 0.2802199125289917, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6776718497276306, "rewards/format_reward_step": 0.9921875, "step": 95 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.531609392761538e-07, "aux_brier/mean_group_std": 0.08987008361879134, "aux_brier/mean_r": 0.8687645458862787, "aux_brier/n_active_tok": 244.25, "aux_brier/n_groups": 12.53125, "aux_brier/n_step_records": 61.0625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5170045345425447, "calib/avg_num_step_conf": 7.65625, "calib/ece": 0.2505882352941176, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.01568627450980392, "calib/gap": 0.017892104561216482, "calib/mean_conf": 0.42698039215686273, "calib/mu_c": 0.43343558282208594, "calib/mu_w": 0.41554347826086946, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.01917647058823529, "calib/std_conf": 0.19564849334678205, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1141.0, "completions/max_terminated_length": 1141.0, "completions/mean_length": 478.625, "completions/mean_terminated_length": 478.625, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 0.1024, "grad_norm": 0.06811456382274628, "learning_rate": 2.888888888888889e-06, "loss": -0.0034, "num_tokens": 20594986.0, "reward": 1.3025823831558228, "reward_std": 0.2817477583885193, "rewards/accuracy_reward_step": 0.63671875, "rewards/final_brier_reward_step": 0.6868917942047119, "rewards/format_reward_step": 0.98828125, "step": 96 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.6849083896430805e-06, "aux_brier/mean_group_std": 0.0853580985421658, "aux_brier/mean_r": 0.8620290815106396, "aux_brier/n_active_tok": 253.375, "aux_brier/n_groups": 14.0, "aux_brier/n_step_records": 63.34375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.42947276361819087, "calib/avg_num_step_conf": 8.16796875, "calib/ece": 0.2601574803149606, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.05905511811023622, "calib/gap": -0.04818715642178917, "calib/mean_conf": 0.4506299212598425, "calib/mu_c": 0.4286231884057971, "calib/mu_w": 0.4768103448275863, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.08374015748031496, "calib/std_conf": 0.21283888413722996, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2624.0, "completions/max_terminated_length": 2624.0, "completions/mean_length": 484.8125, "completions/mean_terminated_length": 484.8125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.10346666666666667, "grad_norm": 0.036569032818078995, "learning_rate": 2.861111111111111e-06, "loss": 0.0255, "num_tokens": 20824170.0, "reward": 1.196521520614624, "reward_std": 0.31164079904556274, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.6610859632492065, "rewards/format_reward_step": 0.984375, "step": 97 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.166580990696886e-07, "aux_brier/mean_group_std": 0.1180901782807219, "aux_brier/mean_r": 0.8158591475667646, "aux_brier/n_active_tok": 273.0, "aux_brier/n_groups": 15.96875, "aux_brier/n_step_records": 68.25, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5150348730096065, "calib/avg_num_step_conf": 8.66015625, "calib/ece": 0.2241633466135458, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07569721115537849, "calib/gap": 0.017927687853665053, "calib/mean_conf": 0.5215737051792828, "calib/mu_c": 0.5288590604026846, "calib/mu_w": 0.5109313725490195, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.07605577689243029, "calib/std_conf": 0.22639741691167684, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2598.0, "completions/max_terminated_length": 2598.0, "completions/mean_length": 514.52734375, "completions/mean_terminated_length": 518.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 128.0, "epoch": 0.10453333333333334, "grad_norm": 0.0860593393445015, "learning_rate": 2.8333333333333335e-06, "loss": 0.0307, "num_tokens": 21062073.0, "reward": 1.243653655052185, "reward_std": 0.335063099861145, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6933647394180298, "rewards/format_reward_step": 0.9765625, "step": 98 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.718932159064053e-09, "aux_brier/mean_group_std": 0.09900352003789668, "aux_brier/mean_r": 0.8149418265467578, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.46830357142857143, "calib/avg_num_step_conf": 8.7890625, "calib/ece": 0.2847366533864541, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.09163346613545817, "calib/gap": -0.01995292582417585, "calib/mean_conf": 0.5323948207171315, "calib/mu_c": 0.5196758241758241, "calib/mu_w": 0.53962875, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22729083665338642, "calib/std_conf": 0.24678504189971875, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1831.0, "completions/max_terminated_length": 1831.0, "completions/mean_length": 570.921875, "completions/mean_terminated_length": 575.4172973632812, "completions/min_length": 0.0, "completions/min_terminated_length": 146.0, "epoch": 0.1056, "grad_norm": 0.19342589378356934, "learning_rate": 2.805555555555556e-06, "loss": 0.0101, "num_tokens": 21314029.0, "reward": 1.004584789276123, "reward_std": 0.2824621796607971, "rewards/accuracy_reward_step": 0.35546875, "rewards/final_brier_reward_step": 0.6511518955230713, "rewards/format_reward_step": 0.97265625, "step": 99 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.571975909002269e-07, "aux_brier/mean_group_std": 0.10170921841231911, "aux_brier/mean_r": 0.8233811362389352, "aux_brier/n_active_tok": 266.375, "aux_brier/n_groups": 13.28125, "aux_brier/n_step_records": 66.59375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5978111319574735, "calib/avg_num_step_conf": 8.5546875, "calib/ece": 0.12350434782608696, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.05928853754940711, "calib/gap": 0.0627040775484678, "calib/mean_conf": 0.5267723320158103, "calib/mu_c": 0.5572569230769231, "calib/mu_w": 0.4945528455284553, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06822134387351778, "calib/std_conf": 0.22034138233223347, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1941.0, "completions/max_terminated_length": 1941.0, "completions/mean_length": 556.44921875, "completions/mean_terminated_length": 560.8306884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.10666666666666667, "grad_norm": 0.12148700654506683, "learning_rate": 2.7777777777777783e-06, "loss": 0.0292, "num_tokens": 21563888.0, "reward": 1.178086280822754, "reward_std": 0.2891013026237488, "rewards/accuracy_reward_step": 0.5078125, "rewards/final_brier_reward_step": 0.7201579809188843, "rewards/format_reward_step": 0.98046875, "step": 100 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.9718142124934843e-07, "aux_brier/mean_group_std": 0.10433184295646486, "aux_brier/mean_r": 0.8045303526163216, "aux_brier/n_active_tok": 298.75, "aux_brier/n_groups": 15.71875, "aux_brier/n_step_records": 74.6875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.6049597423510467, "calib/avg_num_step_conf": 9.80859375, "calib/ece": 0.16257200000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.14, "calib/gap": 0.08663574879227043, "calib/mean_conf": 0.570308, "calib/mu_c": 0.617091304347826, "calib/mu_w": 0.5304555555555556, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.13644, "calib/std_conf": 0.2481456329174463, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2633.0, "completions/max_terminated_length": 2633.0, "completions/mean_length": 628.94140625, "completions/mean_terminated_length": 631.4078979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 193.0, "epoch": 0.10773333333333333, "grad_norm": 0.19665689766407013, "learning_rate": 2.7500000000000004e-06, "loss": 0.0347, "num_tokens": 21831889.0, "reward": 1.1090095043182373, "reward_std": 0.32522842288017273, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.701663076877594, "rewards/format_reward_step": 0.96875, "step": 101 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.312684778271183e-07, "aux_brier/mean_group_std": 0.09773476679729748, "aux_brier/mean_r": 0.8320598076183132, "aux_brier/n_active_tok": 259.75, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 64.9375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4508320230607966, "calib/avg_num_step_conf": 8.2265625, "calib/ece": 0.22388235294117642, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.07058823529411765, "calib/gap": -0.03742924528301894, "calib/mean_conf": 0.5354117647058824, "calib/mu_c": 0.521320754716981, "calib/mu_w": 0.55875, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.06788235294117642, "calib/std_conf": 0.20693971618831775, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1541.0, "completions/max_terminated_length": 1541.0, "completions/mean_length": 489.76953125, "completions/mean_terminated_length": 491.6902160644531, "completions/min_length": 0.0, "completions/min_terminated_length": 126.0, "epoch": 0.1088, "grad_norm": 1.647596836090088, "learning_rate": 2.7222222222222224e-06, "loss": 0.0106, "num_tokens": 22063966.0, "reward": 1.2793034315109253, "reward_std": 0.24243158102035522, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6797136664390564, "rewards/format_reward_step": 0.9765625, "step": 102 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.5484969873025278e-07, "aux_brier/mean_group_std": 0.10507102402294292, "aux_brier/mean_r": 0.7947326888876801, "aux_brier/n_active_tok": 278.5, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 69.625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.45345744680851063, "calib/avg_num_step_conf": 9.40625, "calib/ece": 0.23202448979591836, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.10204081632653061, "calib/gap": -0.0491362520458265, "calib/mean_conf": 0.5712408163265307, "calib/mu_c": 0.5503829787234042, "calib/mu_w": 0.5995192307692307, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.11387755102040815, "calib/std_conf": 0.24214982115427608, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2801.0, "completions/max_terminated_length": 2801.0, "completions/mean_length": 648.47265625, "completions/mean_terminated_length": 653.5787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.10986666666666667, "grad_norm": 0.21203476190567017, "learning_rate": 2.6944444444444444e-06, "loss": 0.0441, "num_tokens": 22334527.0, "reward": 1.18772554397583, "reward_std": 0.30444949865341187, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6415274143218994, "rewards/format_reward_step": 0.953125, "step": 103 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.8911112323902763e-07, "aux_brier/mean_group_std": 0.11028402334238388, "aux_brier/mean_r": 0.7920487339286056, "aux_brier/n_active_tok": 295.5, "aux_brier/n_groups": 14.96875, "aux_brier/n_step_records": 73.875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.6061949746160272, "calib/avg_num_step_conf": 9.703125, "calib/ece": 0.16158799999999998, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.116, "calib/gap": 0.07995263800526964, "calib/mean_conf": 0.616884, "calib/mu_c": 0.6594188034188034, "calib/mu_w": 0.5794661654135338, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15523599999999996, "calib/std_conf": 0.20940274722171148, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2012.0, "completions/max_terminated_length": 2012.0, "completions/mean_length": 574.453125, "completions/mean_terminated_length": 581.2648315429688, "completions/min_length": 0.0, "completions/min_terminated_length": 226.0, "epoch": 0.11093333333333333, "grad_norm": 0.04694162309169769, "learning_rate": 2.666666666666667e-06, "loss": -0.0168, "num_tokens": 22588267.0, "reward": 1.1170382499694824, "reward_std": 0.34029483795166016, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.7025283575057983, "rewards/format_reward_step": 0.96875, "step": 104 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.495803599622537e-07, "aux_brier/mean_group_std": 0.10375383807398324, "aux_brier/mean_r": 0.7698952471212377, "aux_brier/n_active_tok": 300.125, "aux_brier/n_groups": 16.8125, "aux_brier/n_step_records": 75.03125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.45430000000000004, "calib/avg_num_step_conf": 9.7734375, "calib/ece": 0.25625306122448976, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.1673469387755102, "calib/gap": -0.03667066666666652, "calib/mean_conf": 0.6072571428571429, "calib/mu_c": 0.589296, "calib/mu_w": 0.6259666666666666, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1766530612244898, "calib/std_conf": 0.24209441824820904, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2846.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 622.203125, "completions/mean_terminated_length": 622.203125, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 0.112, "grad_norm": 0.9290190935134888, "learning_rate": 2.6388888888888893e-06, "loss": 0.0393, "num_tokens": 22853311.0, "reward": 1.1226959228515625, "reward_std": 0.3886973261833191, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6314086318016052, "rewards/format_reward_step": 0.953125, "step": 105 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.141134875932043e-07, "aux_brier/mean_group_std": 0.11222350522736028, "aux_brier/mean_r": 0.7831322035965229, "aux_brier/n_active_tok": 287.875, "aux_brier/n_groups": 13.4375, "aux_brier/n_step_records": 71.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5274049918474852, "calib/avg_num_step_conf": 9.328125, "calib/ece": 0.20736363636363642, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.08695652173913043, "calib/gap": 0.03241063589614945, "calib/mean_conf": 0.6360355731225296, "calib/mu_c": 0.6532016806722689, "calib/mu_w": 0.6207910447761195, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18652173913043485, "calib/std_conf": 0.21853195676115036, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2327.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 548.98046875, "completions/mean_terminated_length": 551.1333618164062, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.11306666666666666, "grad_norm": 0.5100629925727844, "learning_rate": 2.6111111111111113e-06, "loss": 0.0564, "num_tokens": 23098434.0, "reward": 1.1276081800460815, "reward_std": 0.3030830919742584, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.6823076009750366, "rewards/format_reward_step": 0.984375, "step": 106 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.481459785459329e-07, "aux_brier/mean_group_std": 0.11460418211394487, "aux_brier/mean_r": 0.7763244199315831, "aux_brier/n_active_tok": 300.875, "aux_brier/n_groups": 15.53125, "aux_brier/n_step_records": 75.21875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5704158790170133, "calib/avg_num_step_conf": 9.7890625, "calib/ece": 0.1571608695652174, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.13438735177865613, "calib/gap": 0.046667391304347716, "calib/mean_conf": 0.6392027667984189, "calib/mu_c": 0.6604152173913044, "calib/mu_w": 0.6137478260869567, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12545454545454543, "calib/std_conf": 0.2236818974731012, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2471.0, "completions/max_terminated_length": 2471.0, "completions/mean_length": 563.6171875, "completions/mean_terminated_length": 563.6171875, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "epoch": 0.11413333333333334, "grad_norm": 0.5186272263526917, "learning_rate": 2.5833333333333337e-06, "loss": 0.0295, "num_tokens": 23347336.0, "reward": 1.210200548171997, "reward_std": 0.32997098565101624, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.707989513874054, "rewards/format_reward_step": 0.98828125, "step": 107 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.08463159421391e-08, "aux_brier/mean_group_std": 0.128515429104101, "aux_brier/mean_r": 0.7434579599357282, "aux_brier/n_active_tok": 313.875, "aux_brier/n_groups": 14.8125, "aux_brier/n_step_records": 78.46875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5340425531914893, "calib/avg_num_step_conf": 10.25, "calib/ece": 0.20487831325301203, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.19678714859437751, "calib/gap": 0.008639299931366029, "calib/mean_conf": 0.6878927710843372, "calib/mu_c": 0.6911541935483873, "calib/mu_w": 0.6825148936170212, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13514056224899598, "calib/std_conf": 0.24153244676297436, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2523.0, "completions/max_terminated_length": 2523.0, "completions/mean_length": 610.79296875, "completions/mean_terminated_length": 613.1882934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.1152, "grad_norm": 0.39098060131073, "learning_rate": 2.5555555555555557e-06, "loss": -0.0083, "num_tokens": 23606931.0, "reward": 1.2635798454284668, "reward_std": 0.39778855443000793, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6871317625045776, "rewards/format_reward_step": 0.97265625, "step": 108 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.051111352822769e-07, "aux_brier/mean_group_std": 0.1244642150956407, "aux_brier/mean_r": 0.7541600910765421, "aux_brier/n_active_tok": 332.125, "aux_brier/n_groups": 17.78125, "aux_brier/n_step_records": 83.03125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5911488117931123, "calib/avg_num_step_conf": 11.25, "calib/ece": 0.21940796812749003, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.16733067729083664, "calib/gap": 0.07328400050832384, "calib/mean_conf": 0.6489984063745019, "calib/mu_c": 0.6866622950819672, "calib/mu_w": 0.6133782945736433, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19117529880478085, "calib/std_conf": 0.24650619929786974, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2683.0, "completions/max_terminated_length": 2683.0, "completions/mean_length": 621.921875, "completions/mean_terminated_length": 626.8189086914062, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.11626666666666667, "grad_norm": 0.15936784446239471, "learning_rate": 2.5277777777777778e-06, "loss": 0.0116, "num_tokens": 23870743.0, "reward": 1.1357372999191284, "reward_std": 0.2178267389535904, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6835745573043823, "rewards/format_reward_step": 0.9765625, "step": 109 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.5052846808779634e-07, "aux_brier/mean_group_std": 0.10248965500909708, "aux_brier/mean_r": 0.7875876710416372, "aux_brier/n_active_tok": 290.625, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 72.65625, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4687301587301588, "calib/avg_num_step_conf": 9.5, "calib/ece": 0.2398446215139442, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.11952191235059761, "calib/gap": -0.019774412698412625, "calib/mean_conf": 0.6366494023904384, "calib/mu_c": 0.6268015873015873, "calib/mu_w": 0.6465759999999999, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18725099601593626, "calib/std_conf": 0.21088690675307917, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1493.0, "completions/max_terminated_length": 1493.0, "completions/mean_length": 536.26953125, "completions/mean_terminated_length": 542.6284790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 144.0, "epoch": 0.11733333333333333, "grad_norm": 0.22146645188331604, "learning_rate": 2.5e-06, "loss": -0.0046, "num_tokens": 24112948.0, "reward": 1.1484915018081665, "reward_std": 0.37460654973983765, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6642783880233765, "rewards/format_reward_step": 0.98046875, "step": 110 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2171949545503047e-07, "aux_brier/mean_group_std": 0.1267809938259536, "aux_brier/mean_r": 0.7636379715682697, "aux_brier/n_active_tok": 313.875, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 78.46875, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.5169160487288136, "calib/avg_num_step_conf": 10.9140625, "calib/ece": 0.2302670731707318, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.95703125, "calib/frac_conf_gt_0.9": 0.15853658536585366, "calib/gap": 0.019075105932203407, "calib/mean_conf": 0.6572939024390244, "calib/mu_c": 0.66644375, "calib/mu_w": 0.6473686440677966, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18361788617886188, "calib/std_conf": 0.2449877352235704, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3012.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 628.02734375, "completions/mean_terminated_length": 632.972412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.1184, "grad_norm": 0.18761365115642548, "learning_rate": 2.4722222222222226e-06, "loss": 0.0441, "num_tokens": 24381131.0, "reward": 1.141373872756958, "reward_std": 0.3451157510280609, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6514326333999634, "rewards/format_reward_step": 0.95703125, "step": 111 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.043227629311417e-07, "aux_brier/mean_group_std": 0.13559656376496837, "aux_brier/mean_r": 0.7448649676055553, "aux_brier/n_active_tok": 318.0, "aux_brier/n_groups": 14.78125, "aux_brier/n_step_records": 79.5, "calib/answer_extract_rate": 0.9609375, "calib/auroc": 0.5378966692892735, "calib/avg_num_step_conf": 10.78125, "calib/ece": 0.2578825910931175, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.1902834008097166, "calib/gap": 0.036944105691056994, "calib/mean_conf": 0.7025222672064778, "calib/mu_c": 0.721069105691057, "calib/mu_w": 0.684125, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23121457489878552, "calib/std_conf": 0.2355327592464806, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0234375, "completions/max_length": 2640.0, "completions/max_terminated_length": 2640.0, "completions/mean_length": 619.5546875, "completions/mean_terminated_length": 634.4240112304688, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.11946666666666667, "grad_norm": 0.6164528131484985, "learning_rate": 2.4444444444444447e-06, "loss": -0.0143, "num_tokens": 24647657.0, "reward": 1.1225578784942627, "reward_std": 0.3095180094242096, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6464813947677612, "rewards/format_reward_step": 0.9609375, "step": 112 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.451427569942325e-08, "aux_brier/mean_group_std": 0.12511360426090087, "aux_brier/mean_r": 0.7727980755519827, "aux_brier/n_active_tok": 305.375, "aux_brier/n_groups": 14.125, "aux_brier/n_step_records": 76.34375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5654392764857882, "calib/avg_num_step_conf": 10.3515625, "calib/ece": 0.20167550200803208, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.18072289156626506, "calib/gap": 0.04605951550387588, "calib/mean_conf": 0.6883646586345382, "calib/mu_c": 0.7105620155038759, "calib/mu_w": 0.6645025, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18598393574297184, "calib/std_conf": 0.20727755705483025, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3038.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 571.96484375, "completions/mean_terminated_length": 578.7470703125, "completions/min_length": 0.0, "completions/min_terminated_length": 195.0, "epoch": 0.12053333333333334, "grad_norm": 0.04625099152326584, "learning_rate": 2.4166666666666667e-06, "loss": 0.0577, "num_tokens": 24899280.0, "reward": 1.1564178466796875, "reward_std": 0.36667174100875854, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6803587675094604, "rewards/format_reward_step": 0.96484375, "step": 113 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.7431586325252795e-08, "aux_brier/mean_group_std": 0.1271221744337986, "aux_brier/mean_r": 0.7559673133237643, "aux_brier/n_active_tok": 307.25, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 76.8125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.43421309872922775, "calib/avg_num_step_conf": 10.01171875, "calib/ece": 0.26276968503937015, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.2125984251968504, "calib/gap": -0.053843369175627154, "calib/mean_conf": 0.708253937007874, "calib/mu_c": 0.6872677419354839, "calib/mu_w": 0.741111111111111, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1803937007874016, "calib/std_conf": 0.20985623004249734, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1478.0, "completions/max_terminated_length": 1478.0, "completions/mean_length": 555.5625, "completions/mean_terminated_length": 557.7412109375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.1216, "grad_norm": 0.29303163290023804, "learning_rate": 2.388888888888889e-06, "loss": -0.0013, "num_tokens": 25146528.0, "reward": 1.2709516286849976, "reward_std": 0.2983520030975342, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6775567531585693, "rewards/format_reward_step": 0.9921875, "step": 114 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.079227509639182e-07, "aux_brier/mean_group_std": 0.12487037779668066, "aux_brier/mean_r": 0.744679790723911, "aux_brier/n_active_tok": 311.375, "aux_brier/n_groups": 15.75, "aux_brier/n_step_records": 77.84375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5327266922094509, "calib/avg_num_step_conf": 10.03515625, "calib/ece": 0.2260509960159363, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.17928286852589642, "calib/gap": 0.022761749680715204, "calib/mean_conf": 0.7084509960159362, "calib/mu_c": 0.7189703703703704, "calib/mu_w": 0.6962086206896552, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1983266932270917, "calib/std_conf": 0.22390431567678465, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2628.0, "completions/max_terminated_length": 2628.0, "completions/mean_length": 572.94140625, "completions/mean_terminated_length": 575.1882934570312, "completions/min_length": 0.0, "completions/min_terminated_length": 175.0, "epoch": 0.12266666666666666, "grad_norm": 0.10321390628814697, "learning_rate": 2.361111111111111e-06, "loss": 0.0437, "num_tokens": 25398465.0, "reward": 1.1851181983947754, "reward_std": 0.3440912663936615, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6701600551605225, "rewards/format_reward_step": 0.98046875, "step": 115 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.879896527947402e-07, "aux_brier/mean_group_std": 0.11414278409933216, "aux_brier/mean_r": 0.728497607593749, "aux_brier/n_active_tok": 309.125, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 77.28125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.3849875851531165, "calib/avg_num_step_conf": 10.7578125, "calib/ece": 0.32947420634920643, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.24206349206349206, "calib/gap": -0.10577497294200022, "calib/mean_conf": 0.6637797619047618, "calib/mu_c": 0.6163489208633095, "calib/mu_w": 0.7221238938053097, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22083333333333344, "calib/std_conf": 0.27420488246311364, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2542.0, "completions/max_terminated_length": 2542.0, "completions/mean_length": 658.98046875, "completions/mean_terminated_length": 658.98046875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.12373333333333333, "grad_norm": 0.03025030344724655, "learning_rate": 2.3333333333333336e-06, "loss": 0.0737, "num_tokens": 25671684.0, "reward": 1.1835989952087402, "reward_std": 0.3307391405105591, "rewards/accuracy_reward_step": 0.54296875, "rewards/final_brier_reward_step": 0.60158371925354, "rewards/format_reward_step": 0.98046875, "step": 116 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.8606582269375949e-07, "aux_brier/mean_group_std": 0.11647272754337376, "aux_brier/mean_r": 0.7258551974674999, "aux_brier/n_active_tok": 312.125, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 78.03125, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.573046066252588, "calib/avg_num_step_conf": 11.3125, "calib/ece": 0.2797260000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.188, "calib/gap": 0.07941563146997932, "calib/mean_conf": 0.6972339999999999, "calib/mu_c": 0.7410714285714286, "calib/mu_w": 0.6616557971014493, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26448000000000005, "calib/std_conf": 0.24325610422762262, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 594.1640625, "completions/mean_terminated_length": 601.2095336914062, "completions/min_length": 0.0, "completions/min_terminated_length": 229.0, "epoch": 0.1248, "grad_norm": 0.26157084107398987, "learning_rate": 2.305555555555556e-06, "loss": 0.0291, "num_tokens": 25930390.0, "reward": 1.0866261720657349, "reward_std": 0.3682401478290558, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6511922478675842, "rewards/format_reward_step": 0.97265625, "step": 117 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.868508039050482e-09, "aux_brier/mean_group_std": 0.12191150511231218, "aux_brier/mean_r": 0.7398058479360671, "aux_brier/n_active_tok": 319.625, "aux_brier/n_groups": 16.46875, "aux_brier/n_step_records": 79.90625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.48419150858175247, "calib/avg_num_step_conf": 11.28515625, "calib/ece": 0.2560562248995984, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.21285140562248997, "calib/gap": 0.00477439024390236, "calib/mean_conf": 0.6751084337349398, "calib/mu_c": 0.6775243902439024, "calib/mu_w": 0.6727500000000001, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21859437751004013, "calib/std_conf": 0.24665382772640074, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 613.30859375, "completions/mean_terminated_length": 618.1378173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 0.12586666666666665, "grad_norm": 0.03583335876464844, "learning_rate": 2.277777777777778e-06, "loss": 0.0085, "num_tokens": 26191405.0, "reward": 1.1269874572753906, "reward_std": 0.2839047610759735, "rewards/accuracy_reward_step": 0.48046875, "rewards/final_brier_reward_step": 0.6407626271247864, "rewards/format_reward_step": 0.97265625, "step": 118 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.695697490373021e-08, "aux_brier/mean_group_std": 0.12511687598469357, "aux_brier/mean_r": 0.7263048152072278, "aux_brier/n_active_tok": 328.25, "aux_brier/n_groups": 18.40625, "aux_brier/n_step_records": 82.0625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5402770483193277, "calib/avg_num_step_conf": 11.046875, "calib/ece": 0.25728629032258066, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.24193548387096775, "calib/gap": 0.023152310924369646, "calib/mean_conf": 0.7328749999999999, "calib/mu_c": 0.7433308823529411, "calib/mu_w": 0.7201785714285714, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2208870967741936, "calib/std_conf": 0.22405841624703374, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2838.0, "completions/max_terminated_length": 2838.0, "completions/mean_length": 650.296875, "completions/mean_terminated_length": 658.0079345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.12693333333333334, "grad_norm": 0.10443716496229172, "learning_rate": 2.25e-06, "loss": -0.0069, "num_tokens": 26462945.0, "reward": 1.184114933013916, "reward_std": 0.3726288676261902, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.658334493637085, "rewards/format_reward_step": 0.96875, "step": 119 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.4058681964750406e-09, "aux_brier/mean_group_std": 0.13934014127984432, "aux_brier/mean_r": 0.7520619817123914, "aux_brier/n_active_tok": 306.75, "aux_brier/n_groups": 14.9375, "aux_brier/n_step_records": 76.6875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5577572765072765, "calib/avg_num_step_conf": 10.25, "calib/ece": 0.18874206349206357, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.1746031746031746, "calib/gap": 0.029452182952182993, "calib/mean_conf": 0.6981626984126985, "calib/mu_c": 0.7103175675675676, "calib/mu_w": 0.6808653846153846, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14980158730158738, "calib/std_conf": 0.22008662462482884, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2139.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 567.890625, "completions/mean_terminated_length": 570.11767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 222.0, "epoch": 0.128, "grad_norm": 0.5418168902397156, "learning_rate": 2.222222222222222e-06, "loss": -0.0014, "num_tokens": 26715013.0, "reward": 1.2453269958496094, "reward_std": 0.3320023715496063, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.7000583410263062, "rewards/format_reward_step": 0.984375, "step": 120 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.724665293136642e-07, "aux_brier/mean_group_std": 0.12009707681668952, "aux_brier/mean_r": 0.7294469403023176, "aux_brier/n_active_tok": 298.875, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 74.71875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5350601946721311, "calib/avg_num_step_conf": 10.9140625, "calib/ece": 0.28112000000000015, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.236, "calib/gap": 0.028297899590164044, "calib/mean_conf": 0.6958, "calib/mu_c": 0.7096093750000001, "calib/mu_w": 0.681311475409836, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23246000000000017, "calib/std_conf": 0.26218945821676354, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2988.0, "completions/max_terminated_length": 2988.0, "completions/mean_length": 622.06640625, "completions/mean_terminated_length": 626.9645385742188, "completions/min_length": 0.0, "completions/min_terminated_length": 191.0, "epoch": 0.12906666666666666, "grad_norm": 0.0886792242527008, "learning_rate": 2.1944444444444445e-06, "loss": 0.0221, "num_tokens": 26979318.0, "reward": 1.149843454360962, "reward_std": 0.3843112289905548, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6462490558624268, "rewards/format_reward_step": 0.9765625, "step": 121 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.6662508107188074e-07, "aux_brier/mean_group_std": 0.10055198484079061, "aux_brier/mean_r": 0.7419666062335994, "aux_brier/n_active_tok": 291.875, "aux_brier/n_groups": 13.90625, "aux_brier/n_step_records": 72.96875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.47350591243208695, "calib/avg_num_step_conf": 9.6328125, "calib/ece": 0.24050511811023628, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.27165354330708663, "calib/gap": -0.0071516203259828215, "calib/mean_conf": 0.7447704724409449, "calib/mu_c": 0.7418140939597315, "calib/mu_w": 0.7489657142857143, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.1993307086614174, "calib/std_conf": 0.20916631186184603, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2165.0, "completions/max_terminated_length": 2165.0, "completions/mean_length": 575.5703125, "completions/mean_terminated_length": 577.8275146484375, "completions/min_length": 0.0, "completions/min_terminated_length": 242.0, "epoch": 0.13013333333333332, "grad_norm": 0.08715233951807022, "learning_rate": 2.166666666666667e-06, "loss": 0.0025, "num_tokens": 27234008.0, "reward": 1.2456527948379517, "reward_std": 0.2976229786872864, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6779236793518066, "rewards/format_reward_step": 0.98828125, "step": 122 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.3778430255361194e-07, "aux_brier/mean_group_std": 0.12481574598913961, "aux_brier/mean_r": 0.7195557555260736, "aux_brier/n_active_tok": 310.5, "aux_brier/n_groups": 16.71875, "aux_brier/n_step_records": 77.625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.44057377049180335, "calib/avg_num_step_conf": 10.64453125, "calib/ece": 0.30958333333333343, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.2976190476190476, "calib/gap": -0.018156998738965924, "calib/mean_conf": 0.7349404761904761, "calib/mu_c": 0.7255737704918033, "calib/mu_w": 0.7437307692307692, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.28019841269841284, "calib/std_conf": 0.23893502227512506, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2185.0, "completions/max_terminated_length": 2185.0, "completions/mean_length": 637.51953125, "completions/mean_terminated_length": 642.5393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 209.0, "epoch": 0.1312, "grad_norm": 0.26524844765663147, "learning_rate": 2.138888888888889e-06, "loss": 0.0073, "num_tokens": 27502501.0, "reward": 1.1216198205947876, "reward_std": 0.3030514419078827, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6114792823791504, "rewards/format_reward_step": 0.984375, "step": 123 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.874448450431501e-08, "aux_brier/mean_group_std": 0.12056207255091692, "aux_brier/mean_r": 0.7286804917728428, "aux_brier/n_active_tok": 296.375, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 74.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4627914723517655, "calib/avg_num_step_conf": 10.0, "calib/ece": 0.22895652173913045, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3201581027667984, "calib/gap": -0.028399733510992675, "calib/mean_conf": 0.7627905138339921, "calib/mu_c": 0.7521265822784811, "calib/mu_w": 0.7805263157894737, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18362055335968383, "calib/std_conf": 0.21598772012774484, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1863.0, "completions/max_terminated_length": 1863.0, "completions/mean_length": 579.78125, "completions/mean_terminated_length": 584.346435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 227.0, "epoch": 0.13226666666666667, "grad_norm": 0.047126758843660355, "learning_rate": 2.1111111111111114e-06, "loss": -0.0101, "num_tokens": 27757741.0, "reward": 1.2809193134307861, "reward_std": 0.2520931661128998, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.678365170955658, "rewards/format_reward_step": 0.98828125, "step": 124 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.2165703001417754e-07, "aux_brier/mean_group_std": 0.0958769819730208, "aux_brier/mean_r": 0.7294786087706503, "aux_brier/n_active_tok": 281.5, "aux_brier/n_groups": 13.75, "aux_brier/n_step_records": 70.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.45331276910224283, "calib/avg_num_step_conf": 10.3125, "calib/ece": 0.3159120000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.264, "calib/gap": -0.021969924812030084, "calib/mean_conf": 0.7176880000000001, "calib/mu_c": 0.7060000000000001, "calib/mu_w": 0.7279699248120302, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2828000000000001, "calib/std_conf": 0.22902425778943156, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2454.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 613.84765625, "completions/mean_terminated_length": 618.6810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 178.0, "epoch": 0.13333333333333333, "grad_norm": 0.11766476184129715, "learning_rate": 2.0833333333333334e-06, "loss": 0.0049, "num_tokens": 28019694.0, "reward": 1.101876974105835, "reward_std": 0.34241873025894165, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.6106326580047607, "rewards/format_reward_step": 0.9765625, "step": 125 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -9.08536907928692e-08, "aux_brier/mean_group_std": 0.12238830568909123, "aux_brier/mean_r": 0.7182270958214281, "aux_brier/n_active_tok": 295.125, "aux_brier/n_groups": 14.6875, "aux_brier/n_step_records": 73.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.476510496671787, "calib/avg_num_step_conf": 9.7109375, "calib/ece": 0.30528000000000005, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.308, "calib/gap": 0.021370967741935365, "calib/mean_conf": 0.7644000000000001, "calib/mu_c": 0.7749999999999999, "calib/mu_w": 0.7536290322580645, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.2828400000000001, "calib/std_conf": 0.22342569234535226, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2664.0, "completions/max_terminated_length": 2664.0, "completions/mean_length": 586.80859375, "completions/mean_terminated_length": 591.4291381835938, "completions/min_length": 0.0, "completions/min_terminated_length": 210.0, "epoch": 0.1344, "grad_norm": 0.0369451642036438, "learning_rate": 2.0555555555555555e-06, "loss": 0.0068, "num_tokens": 28275381.0, "reward": 1.134810447692871, "reward_std": 0.31612884998321533, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.6251792907714844, "rewards/format_reward_step": 0.97265625, "step": 126 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.7130914514718043e-07, "aux_brier/mean_group_std": 0.1087601259225485, "aux_brier/mean_r": 0.7359164889442309, "aux_brier/n_active_tok": 298.75, "aux_brier/n_groups": 14.1875, "aux_brier/n_step_records": 74.6875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5479636591478696, "calib/avg_num_step_conf": 9.796875, "calib/ece": 0.25896561264822143, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2924901185770751, "calib/gap": 0.057599429824561454, "calib/mean_conf": 0.7517853754940711, "calib/mu_c": 0.7791052631578949, "calib/mu_w": 0.7215058333333334, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24252964426877477, "calib/std_conf": 0.22943804037496499, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2318.0, "completions/max_terminated_length": 2318.0, "completions/mean_length": 534.515625, "completions/mean_terminated_length": 538.7244262695312, "completions/min_length": 0.0, "completions/min_terminated_length": 162.0, "epoch": 0.13546666666666668, "grad_norm": 0.38190189003944397, "learning_rate": 2.027777777777778e-06, "loss": 0.0133, "num_tokens": 28515889.0, "reward": 1.1805983781814575, "reward_std": 0.32408082485198975, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6677061319351196, "rewards/format_reward_step": 0.98828125, "step": 127 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.6091981129324786e-08, "aux_brier/mean_group_std": 0.08898131700184603, "aux_brier/mean_r": 0.7498846271760894, "aux_brier/n_active_tok": 279.125, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 69.78125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5320246593121349, "calib/avg_num_step_conf": 9.08203125, "calib/ece": 0.28488755020080325, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.26104417670682734, "calib/gap": 0.03906696950032451, "calib/mean_conf": 0.7101325301204818, "calib/mu_c": 0.7311565217391306, "calib/mu_w": 0.6920895522388061, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26658634538152615, "calib/std_conf": 0.23951175761152257, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2806.0, "completions/max_terminated_length": 2806.0, "completions/mean_length": 557.421875, "completions/mean_terminated_length": 564.0316162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 165.0, "epoch": 0.13653333333333334, "grad_norm": 0.08702600747346878, "learning_rate": 2.0000000000000003e-06, "loss": 0.0082, "num_tokens": 28765253.0, "reward": 1.0940568447113037, "reward_std": 0.38162872195243835, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.634039580821991, "rewards/format_reward_step": 0.97265625, "step": 128 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.6765578878550365e-07, "aux_brier/mean_group_std": 0.11354217886692185, "aux_brier/mean_r": 0.7451862984720397, "aux_brier/n_active_tok": 273.5, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 68.375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5955418034857287, "calib/avg_num_step_conf": 8.9921875, "calib/ece": 0.1803921568627452, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.2901960784313726, "calib/gap": 0.0871558474362214, "calib/mean_conf": 0.7545098039215685, "calib/mu_c": 0.7910810810810811, "calib/mu_w": 0.7039252336448597, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17725490196078442, "calib/std_conf": 0.20507974274480467, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2692.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 536.171875, "completions/mean_terminated_length": 536.171875, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 0.1376, "grad_norm": 0.7112810611724854, "learning_rate": 1.9722222222222224e-06, "loss": 0.0176, "num_tokens": 29004897.0, "reward": 1.2570972442626953, "reward_std": 0.23593538999557495, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.723701536655426, "rewards/format_reward_step": 0.99609375, "step": 129 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.0008908032932418e-07, "aux_brier/mean_group_std": 0.1152837137003091, "aux_brier/mean_r": 0.7432447140400249, "aux_brier/n_active_tok": 275.625, "aux_brier/n_groups": 12.84375, "aux_brier/n_step_records": 68.90625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.57125, "calib/avg_num_step_conf": 9.0625, "calib/ece": 0.22098425196850408, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.25984251968503935, "calib/gap": 0.06677948717948723, "calib/mean_conf": 0.7105905511811024, "calib/mu_c": 0.7379333333333333, "calib/mu_w": 0.6711538461538461, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.17051181102362217, "calib/std_conf": 0.24288466056117392, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1883.0, "completions/max_terminated_length": 1883.0, "completions/mean_length": 515.00390625, "completions/mean_terminated_length": 519.05908203125, "completions/min_length": 0.0, "completions/min_terminated_length": 172.0, "epoch": 0.13866666666666666, "grad_norm": 0.07466782629489899, "learning_rate": 1.944444444444445e-06, "loss": -0.0274, "num_tokens": 29242026.0, "reward": 1.2599036693572998, "reward_std": 0.22464410960674286, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.7114894390106201, "rewards/format_reward_step": 0.9921875, "step": 130 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.091396594938182e-08, "aux_brier/mean_group_std": 0.08964348396018788, "aux_brier/mean_r": 0.7485890254713838, "aux_brier/n_active_tok": 265.5, "aux_brier/n_groups": 12.78125, "aux_brier/n_step_records": 66.375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5577432790609618, "calib/avg_num_step_conf": 8.72265625, "calib/ece": 0.31573122529644276, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.25691699604743085, "calib/gap": 0.036804871891960134, "calib/mean_conf": 0.7388142292490117, "calib/mu_c": 0.7590350877192982, "calib/mu_w": 0.722230215827338, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.30197628458498027, "calib/std_conf": 0.21122309234134398, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2725.0, "completions/max_terminated_length": 2725.0, "completions/mean_length": 518.0078125, "completions/mean_terminated_length": 520.0392456054688, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.13973333333333332, "grad_norm": 0.29209235310554504, "learning_rate": 1.916666666666667e-06, "loss": 0.007, "num_tokens": 29480844.0, "reward": 1.098313570022583, "reward_std": 0.2606309950351715, "rewards/accuracy_reward_step": 0.4453125, "rewards/final_brier_reward_step": 0.6354421973228455, "rewards/format_reward_step": 0.98828125, "step": 131 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.495046869613551e-08, "aux_brier/mean_group_std": 0.1333929284300119, "aux_brier/mean_r": 0.7491438215212559, "aux_brier/n_active_tok": 280.25, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 70.0625, "calib/answer_extract_rate": 1.0, "calib/auroc": 0.4191548582995951, "calib/avg_num_step_conf": 9.390625, "calib/ece": 0.27708593750000016, "calib/final_conf_rate": 1.0, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.265625, "calib/gap": -0.04846457489878542, "calib/mean_conf": 0.7235703124999999, "calib/mu_c": 0.7038815789473685, "calib/mu_w": 0.7523461538461539, "calib/nonempty_final_conf_rate": 1.0, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.20345312500000015, "calib/std_conf": 0.23251153142081435, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1347.0, "completions/max_terminated_length": 1347.0, "completions/mean_length": 530.34375, "completions/mean_terminated_length": 532.423583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 163.0, "epoch": 0.1408, "grad_norm": 0.0908280611038208, "learning_rate": 1.888888888888889e-06, "loss": 0.0276, "num_tokens": 29722204.0, "reward": 1.2570316791534424, "reward_std": 0.328461229801178, "rewards/accuracy_reward_step": 0.59375, "rewards/final_brier_reward_step": 0.66093909740448, "rewards/format_reward_step": 0.99609375, "step": 132 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.310082064335919e-08, "aux_brier/mean_group_std": 0.12002071878245187, "aux_brier/mean_r": 0.7273467181940331, "aux_brier/n_active_tok": 301.75, "aux_brier/n_groups": 15.1875, "aux_brier/n_step_records": 75.4375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.49354471871028166, "calib/avg_num_step_conf": 10.14453125, "calib/ece": 0.3808180000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.268, "calib/gap": 0.0077543982875109, "calib/mean_conf": 0.7183820000000001, "calib/mu_c": 0.7230656565656566, "calib/mu_w": 0.7153112582781457, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.35160000000000013, "calib/std_conf": 0.24897752323452813, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 618.140625, "completions/mean_terminated_length": 625.4703979492188, "completions/min_length": 0.0, "completions/min_terminated_length": 170.0, "epoch": 0.14186666666666667, "grad_norm": 0.03698096051812172, "learning_rate": 1.8611111111111113e-06, "loss": 0.0422, "num_tokens": 29986792.0, "reward": 1.0185657739639282, "reward_std": 0.3775428831577301, "rewards/accuracy_reward_step": 0.38671875, "rewards/final_brier_reward_step": 0.5820757150650024, "rewards/format_reward_step": 0.97265625, "step": 133 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.5884086419256676e-08, "aux_brier/mean_group_std": 0.11787864586886822, "aux_brier/mean_r": 0.7481217165348084, "aux_brier/n_active_tok": 291.875, "aux_brier/n_groups": 15.9375, "aux_brier/n_step_records": 72.96875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5220199092741935, "calib/avg_num_step_conf": 9.83203125, "calib/ece": 0.2597936507936509, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2261904761904762, "calib/gap": 0.026626008064516204, "calib/mean_conf": 0.7144920634920635, "calib/mu_c": 0.72759375, "calib/mu_w": 0.7009677419354838, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.23317460317460328, "calib/std_conf": 0.2360187222205034, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1992.0, "completions/max_terminated_length": 1992.0, "completions/mean_length": 605.64453125, "completions/mean_terminated_length": 608.0196533203125, "completions/min_length": 0.0, "completions/min_terminated_length": 225.0, "epoch": 0.14293333333333333, "grad_norm": 0.12758532166481018, "learning_rate": 1.8333333333333333e-06, "loss": 0.0279, "num_tokens": 30250789.0, "reward": 1.1529327630996704, "reward_std": 0.363974004983902, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6507935523986816, "rewards/format_reward_step": 0.98046875, "step": 134 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.978052592363724e-08, "aux_brier/mean_group_std": 0.09441600157997393, "aux_brier/mean_r": 0.7468353627329324, "aux_brier/n_active_tok": 289.5, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 72.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5035014005602241, "calib/avg_num_step_conf": 9.6796875, "calib/ece": 0.20294820717131482, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.2549800796812749, "calib/gap": 0.009689875950379845, "calib/mean_conf": 0.7113147410358567, "calib/mu_c": 0.7150980392156863, "calib/mu_w": 0.7054081632653064, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15235059760956182, "calib/std_conf": 0.23321472532710083, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2677.0, "completions/max_terminated_length": 2677.0, "completions/mean_length": 552.80859375, "completions/mean_terminated_length": 557.1614379882812, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.144, "grad_norm": 0.08034074306488037, "learning_rate": 1.8055555555555557e-06, "loss": 0.0111, "num_tokens": 30498188.0, "reward": 1.2638380527496338, "reward_std": 0.3435894846916199, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6881648302078247, "rewards/format_reward_step": 0.98046875, "step": 135 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.437431097847888e-07, "aux_brier/mean_group_std": 0.13127980531918174, "aux_brier/mean_r": 0.7838554624555897, "aux_brier/n_active_tok": 298.0, "aux_brier/n_groups": 15.5, "aux_brier/n_step_records": 74.5, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5721626733921816, "calib/avg_num_step_conf": 9.796875, "calib/ece": 0.23940277777777777, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.15873015873015872, "calib/gap": 0.058459331651954716, "calib/mean_conf": 0.7009940476190475, "calib/mu_c": 0.7311516393442623, "calib/mu_w": 0.6726923076923076, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22813492063492063, "calib/std_conf": 0.21794277355290823, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2238.0, "completions/max_terminated_length": 2238.0, "completions/mean_length": 556.41796875, "completions/mean_terminated_length": 560.7991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.14506666666666668, "grad_norm": 0.33265256881713867, "learning_rate": 1.777777777777778e-06, "loss": 0.0139, "num_tokens": 30749119.0, "reward": 1.1373050212860107, "reward_std": 0.3235567808151245, "rewards/accuracy_reward_step": 0.4765625, "rewards/final_brier_reward_step": 0.6742199063301086, "rewards/format_reward_step": 0.984375, "step": 136 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.116071700144275e-07, "aux_brier/mean_group_std": 0.12785028119366598, "aux_brier/mean_r": 0.7478586144411333, "aux_brier/n_active_tok": 297.375, "aux_brier/n_groups": 15.6875, "aux_brier/n_step_records": 74.34375, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5757954036233275, "calib/avg_num_step_conf": 9.64453125, "calib/ece": 0.28439000000000003, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.256, "calib/gap": 0.05277238973177134, "calib/mean_conf": 0.74691, "calib/mu_c": 0.7728740157480315, "calib/mu_w": 0.7201016260162602, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.26165000000000005, "calib/std_conf": 0.21356913845403788, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2983.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 576.25390625, "completions/mean_terminated_length": 576.25390625, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.14613333333333334, "grad_norm": 0.11480996757745743, "learning_rate": 1.75e-06, "loss": 0.0637, "num_tokens": 31003624.0, "reward": 1.1488656997680664, "reward_std": 0.3167296051979065, "rewards/accuracy_reward_step": 0.49609375, "rewards/final_brier_reward_step": 0.6579625606536865, "rewards/format_reward_step": 0.9765625, "step": 137 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.1831648958904317e-06, "aux_brier/mean_group_std": 0.10604086520736114, "aux_brier/mean_r": 0.7751440810207609, "aux_brier/n_active_tok": 285.75, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 71.4375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.458918774253981, "calib/avg_num_step_conf": 9.828125, "calib/ece": 0.21845849802371553, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.233201581027668, "calib/gap": -0.028429680181988326, "calib/mean_conf": 0.6979841897233201, "calib/mu_c": 0.6874213836477988, "calib/mu_w": 0.7158510638297871, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1439920948616602, "calib/std_conf": 0.22807903713595687, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2865.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 535.0078125, "completions/mean_terminated_length": 539.220458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.1472, "grad_norm": 0.6342697143554688, "learning_rate": 1.7222222222222224e-06, "loss": 0.0026, "num_tokens": 31244922.0, "reward": 1.2872869968414307, "reward_std": 0.3326365351676941, "rewards/accuracy_reward_step": 0.62109375, "rewards/final_brier_reward_step": 0.6882105469703674, "rewards/format_reward_step": 0.98828125, "step": 138 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.865244447593842e-08, "aux_brier/mean_group_std": 0.11965349470914817, "aux_brier/mean_r": 0.7836175999484832, "aux_brier/n_active_tok": 283.75, "aux_brier/n_groups": 14.625, "aux_brier/n_step_records": 70.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5152098575279168, "calib/avg_num_step_conf": 9.21484375, "calib/ece": 0.2117391304347826, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.21739130434782608, "calib/gap": 0.010970992170453275, "calib/mean_conf": 0.6911857707509882, "calib/mu_c": 0.6957823129251701, "calib/mu_w": 0.6848113207547168, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.16094861660079052, "calib/std_conf": 0.22697568490183898, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1989.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 524.66015625, "completions/mean_terminated_length": 526.7176513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 156.0, "epoch": 0.14826666666666666, "grad_norm": 0.03418288379907608, "learning_rate": 1.6944444444444446e-06, "loss": 0.0167, "num_tokens": 31482331.0, "reward": 1.2388288974761963, "reward_std": 0.2992714047431946, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6896905899047852, "rewards/format_reward_step": 0.984375, "step": 139 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.343148929941698e-07, "aux_brier/mean_group_std": 0.102011587847724, "aux_brier/mean_r": 0.7706437269626397, "aux_brier/n_active_tok": 267.125, "aux_brier/n_groups": 12.625, "aux_brier/n_step_records": 66.78125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4567003132613992, "calib/avg_num_step_conf": 8.57421875, "calib/ece": 0.22116732283464563, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2795275590551181, "calib/gap": -0.036213191785590015, "calib/mean_conf": 0.7223759842519686, "calib/mu_c": 0.7102573964497042, "calib/mu_w": 0.7464705882352942, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.13909448818897635, "calib/std_conf": 0.21748027644758106, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1719.0, "completions/max_terminated_length": 1719.0, "completions/mean_length": 539.32421875, "completions/mean_terminated_length": 541.4392700195312, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.14933333333333335, "grad_norm": 0.35845911502838135, "learning_rate": 1.6666666666666667e-06, "loss": 0.0268, "num_tokens": 31725414.0, "reward": 1.330304503440857, "reward_std": 0.30226558446884155, "rewards/accuracy_reward_step": 0.66015625, "rewards/final_brier_reward_step": 0.7040305137634277, "rewards/format_reward_step": 0.98828125, "step": 140 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.3600390038337462e-06, "aux_brier/mean_group_std": 0.104313854179082, "aux_brier/mean_r": 0.7551585907608853, "aux_brier/n_active_tok": 273.75, "aux_brier/n_groups": 13.25, "aux_brier/n_step_records": 68.4375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.633959101575595, "calib/avg_num_step_conf": 9.09375, "calib/ece": 0.14115079365079367, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.23412698412698413, "calib/gap": 0.11824404961448198, "calib/mean_conf": 0.6844047619047618, "calib/mu_c": 0.7289808917197452, "calib/mu_w": 0.6107368421052632, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1012698412698413, "calib/std_conf": 0.24850194590823912, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2951.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 594.125, "completions/mean_terminated_length": 601.1699829101562, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.1504, "grad_norm": 0.1141957938671112, "learning_rate": 1.638888888888889e-06, "loss": -0.0129, "num_tokens": 31984606.0, "reward": 1.2913074493408203, "reward_std": 0.31009572744369507, "rewards/accuracy_reward_step": 0.61328125, "rewards/final_brier_reward_step": 0.7433550953865051, "rewards/format_reward_step": 0.984375, "step": 141 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.3187952138958856e-07, "aux_brier/mean_group_std": 0.10804659931268239, "aux_brier/mean_r": 0.7643841751146132, "aux_brier/n_active_tok": 289.5, "aux_brier/n_groups": 14.09375, "aux_brier/n_step_records": 72.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.47524177949709856, "calib/avg_num_step_conf": 9.48046875, "calib/ece": 0.28179282868525896, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.250996015936255, "calib/gap": -0.012043197936815075, "calib/mean_conf": 0.7076892430278884, "calib/mu_c": 0.7024113475177305, "calib/mu_w": 0.7144545454545456, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.21386454183266934, "calib/std_conf": 0.24098618565618288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2197.0, "completions/max_terminated_length": 2197.0, "completions/mean_length": 557.15625, "completions/mean_terminated_length": 561.5433349609375, "completions/min_length": 0.0, "completions/min_terminated_length": 169.0, "epoch": 0.15146666666666667, "grad_norm": 0.2936396896839142, "learning_rate": 1.6111111111111113e-06, "loss": -0.011, "num_tokens": 32232398.0, "reward": 1.2048792839050293, "reward_std": 0.22850966453552246, "rewards/accuracy_reward_step": 0.55078125, "rewards/final_brier_reward_step": 0.6554551124572754, "rewards/format_reward_step": 0.98046875, "step": 142 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.5166282119061236e-09, "aux_brier/mean_group_std": 0.12278076633034357, "aux_brier/mean_r": 0.7845885686918515, "aux_brier/n_active_tok": 294.25, "aux_brier/n_groups": 17.0, "aux_brier/n_step_records": 73.5625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5487562189054727, "calib/avg_num_step_conf": 9.4609375, "calib/ece": 0.2040354330708662, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.18503937007874016, "calib/gap": 0.05270211442786088, "calib/mean_conf": 0.6980118110236221, "calib/mu_c": 0.7229104477611941, "calib/mu_w": 0.6702083333333332, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18724409448818904, "calib/std_conf": 0.22331892526202243, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2741.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 560.63671875, "completions/mean_terminated_length": 562.8353271484375, "completions/min_length": 0.0, "completions/min_terminated_length": 201.0, "epoch": 0.15253333333333333, "grad_norm": 0.1971731185913086, "learning_rate": 1.5833333333333333e-06, "loss": 0.0093, "num_tokens": 32483257.0, "reward": 1.1926939487457275, "reward_std": 0.2808946371078491, "rewards/accuracy_reward_step": 0.5234375, "rewards/final_brier_reward_step": 0.6926510334014893, "rewards/format_reward_step": 0.9921875, "step": 143 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.047745739086615e-07, "aux_brier/mean_group_std": 0.11349697755479779, "aux_brier/mean_r": 0.751951976930463, "aux_brier/n_active_tok": 288.75, "aux_brier/n_groups": 14.5625, "aux_brier/n_step_records": 72.1875, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5052215825411701, "calib/avg_num_step_conf": 9.7109375, "calib/ece": 0.23928286852589642, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2749003984063745, "calib/gap": 0.001300709599678762, "calib/mean_conf": 0.7060557768924304, "calib/mu_c": 0.7065584415584416, "calib/mu_w": 0.7052577319587628, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.9921875, "calib/pce": 0.16589641434262947, "calib/std_conf": 0.24177296006711663, "calib/step_conf_rate": 0.9921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2479.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 558.18359375, "completions/mean_terminated_length": 564.8023681640625, "completions/min_length": 0.0, "completions/min_terminated_length": 155.0, "epoch": 0.1536, "grad_norm": 0.10048002749681473, "learning_rate": 1.5555555555555558e-06, "loss": -0.0182, "num_tokens": 32730280.0, "reward": 1.2574422359466553, "reward_std": 0.3413987457752228, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.678206205368042, "rewards/format_reward_step": 0.97265625, "step": 144 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.063732889052595e-07, "aux_brier/mean_group_std": 0.13482206581994385, "aux_brier/mean_r": 0.728899507439789, "aux_brier/n_active_tok": 302.125, "aux_brier/n_groups": 15.34375, "aux_brier/n_step_records": 75.53125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4406591884780502, "calib/avg_num_step_conf": 9.7890625, "calib/ece": 0.23324110671936765, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2924901185770751, "calib/gap": -0.03391012325162723, "calib/mean_conf": 0.7278656126482212, "calib/mu_c": 0.7162048192771084, "calib/mu_w": 0.7501149425287357, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15249011857707517, "calib/std_conf": 0.23770183793614533, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1298.0, "completions/max_terminated_length": 1298.0, "completions/mean_length": 535.14453125, "completions/mean_terminated_length": 539.3582763671875, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.15466666666666667, "grad_norm": 0.17978081107139587, "learning_rate": 1.527777777777778e-06, "loss": -0.0044, "num_tokens": 32969981.0, "reward": 1.3148910999298096, "reward_std": 0.3562201261520386, "rewards/accuracy_reward_step": 0.6484375, "rewards/final_brier_reward_step": 0.6892519593238831, "rewards/format_reward_step": 0.98828125, "step": 145 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.7008681346686316e-07, "aux_brier/mean_group_std": 0.12852282145696792, "aux_brier/mean_r": 0.7500887148502667, "aux_brier/n_active_tok": 320.0, "aux_brier/n_groups": 16.71875, "aux_brier/n_step_records": 80.0, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4882333978078659, "calib/avg_num_step_conf": 10.546875, "calib/ece": 0.33517928286852594, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.29880478087649404, "calib/gap": -0.006728562217924172, "calib/mean_conf": 0.7341434262948207, "calib/mu_c": 0.7303636363636362, "calib/mu_w": 0.7370921985815604, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31553784860557776, "calib/std_conf": 0.2305570150696401, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2851.0, "completions/max_terminated_length": 2851.0, "completions/mean_length": 589.796875, "completions/mean_terminated_length": 596.79052734375, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 0.15573333333333333, "grad_norm": 0.060811351984739304, "learning_rate": 1.5e-06, "loss": 0.0383, "num_tokens": 33228185.0, "reward": 1.0732979774475098, "reward_std": 0.3476945459842682, "rewards/accuracy_reward_step": 0.43359375, "rewards/final_brier_reward_step": 0.5978792905807495, "rewards/format_reward_step": 0.98046875, "step": 146 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 8.878063501027711e-08, "aux_brier/mean_group_std": 0.11758874771701373, "aux_brier/mean_r": 0.7280103099137688, "aux_brier/n_active_tok": 305.375, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 76.34375, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.6231580919080919, "calib/avg_num_step_conf": 9.875, "calib/ece": 0.34956862745098044, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.3137254901960784, "calib/gap": 0.08958479020979015, "calib/mean_conf": 0.7591372549019608, "calib/mu_c": 0.8093750000000001, "calib/mu_w": 0.7197902097902099, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3347450980392157, "calib/std_conf": 0.22486954994935443, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1712.0, "completions/max_terminated_length": 1712.0, "completions/mean_length": 577.875, "completions/mean_terminated_length": 580.1412353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.1568, "grad_norm": 0.15195201337337494, "learning_rate": 1.4722222222222225e-06, "loss": 0.0478, "num_tokens": 33479801.0, "reward": 1.0961443185806274, "reward_std": 0.326327919960022, "rewards/accuracy_reward_step": 0.4375, "rewards/final_brier_reward_step": 0.6423898935317993, "rewards/format_reward_step": 0.99609375, "step": 147 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.185161645164364e-08, "aux_brier/mean_group_std": 0.1443634725519226, "aux_brier/mean_r": 0.7559351885308664, "aux_brier/n_active_tok": 293.5, "aux_brier/n_groups": 15.09375, "aux_brier/n_step_records": 73.375, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.5365727243278264, "calib/avg_num_step_conf": 9.41015625, "calib/ece": 0.1691071428571429, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.25, "calib/gap": 0.03961224489795945, "calib/mean_conf": 0.732202380952381, "calib/mu_c": 0.7487074829931973, "calib/mu_w": 0.7090952380952379, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1589880952380953, "calib/std_conf": 0.2014777024821994, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3030.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 577.28515625, "completions/mean_terminated_length": 579.549072265625, "completions/min_length": 0.0, "completions/min_terminated_length": 149.0, "epoch": 0.15786666666666666, "grad_norm": 0.4152239263057709, "learning_rate": 1.4444444444444445e-06, "loss": 0.0645, "num_tokens": 33732698.0, "reward": 1.2391388416290283, "reward_std": 0.3460821807384491, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.6987428665161133, "rewards/format_reward_step": 0.98046875, "step": 148 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.499285150838528e-07, "aux_brier/mean_group_std": 0.13527552986521668, "aux_brier/mean_r": 0.7384624378093363, "aux_brier/n_active_tok": 304.875, "aux_brier/n_groups": 15.5625, "aux_brier/n_step_records": 76.21875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5407376199118484, "calib/avg_num_step_conf": 10.30078125, "calib/ece": 0.2522489959839358, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.285140562248996, "calib/gap": 0.040521778584391765, "calib/mean_conf": 0.7263855421686747, "calib/mu_c": 0.7452631578947367, "calib/mu_w": 0.704741379310345, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.22224899598393583, "calib/std_conf": 0.2391530150557342, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 623.921875, "completions/mean_terminated_length": 628.8346557617188, "completions/min_length": 0.0, "completions/min_terminated_length": 158.0, "epoch": 0.15893333333333334, "grad_norm": 0.12097613513469696, "learning_rate": 1.4166666666666667e-06, "loss": 0.0581, "num_tokens": 33996878.0, "reward": 1.1659348011016846, "reward_std": 0.3879719078540802, "rewards/accuracy_reward_step": 0.51953125, "rewards/final_brier_reward_step": 0.6559269428253174, "rewards/format_reward_step": 0.96484375, "step": 149 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.6660795848499887e-07, "aux_brier/mean_group_std": 0.12032826164802157, "aux_brier/mean_r": 0.7387447603773472, "aux_brier/n_active_tok": 281.625, "aux_brier/n_groups": 15.375, "aux_brier/n_step_records": 70.40625, "calib/answer_extract_rate": 0.953125, "calib/auroc": 0.5689479060265576, "calib/avg_num_step_conf": 9.375, "calib/ece": 0.14641975308641983, "calib/final_conf_rate": 0.94921875, "calib/format_rate": 0.94921875, "calib/frac_conf_gt_0.9": 0.2551440329218107, "calib/gap": 0.06222092514227351, "calib/mean_conf": 0.7320164609053498, "calib/mu_c": 0.7548051948051948, "calib/mu_w": 0.6925842696629213, "calib/nonempty_final_conf_rate": 0.94921875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.12234567901234573, "calib/std_conf": 0.21270462850699287, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2751.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 546.6328125, "completions/mean_terminated_length": 548.7764892578125, "completions/min_length": 0.0, "completions/min_terminated_length": 167.0, "epoch": 0.16, "grad_norm": 0.10970129072666168, "learning_rate": 1.3888888888888892e-06, "loss": 0.0125, "num_tokens": 34241776.0, "reward": 1.2561277151107788, "reward_std": 0.34284496307373047, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.70419842004776, "rewards/format_reward_step": 0.94921875, "step": 150 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.444867650667874e-08, "aux_brier/mean_group_std": 0.15244814157287057, "aux_brier/mean_r": 0.7331580950992309, "aux_brier/n_active_tok": 331.875, "aux_brier/n_groups": 19.46875, "aux_brier/n_step_records": 82.96875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.500032535137949, "calib/avg_num_step_conf": 10.9609375, "calib/ece": 0.3265060240963856, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.2891566265060241, "calib/gap": 0.009864003123372989, "calib/mean_conf": 0.7433734939759037, "calib/mu_c": 0.7487610619469025, "calib/mu_w": 0.7388970588235295, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3080321285140563, "calib/std_conf": 0.2085228817879649, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3070.0, "completions/max_terminated_length": 3070.0, "completions/mean_length": 641.99609375, "completions/mean_terminated_length": 641.99609375, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.16106666666666666, "grad_norm": 0.057767193764448166, "learning_rate": 1.3611111111111112e-06, "loss": 0.0648, "num_tokens": 34513151.0, "reward": 1.0808541774749756, "reward_std": 0.4014917016029358, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6124788522720337, "rewards/format_reward_step": 0.97265625, "step": 151 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.6950521814228523e-09, "aux_brier/mean_group_std": 0.1271265112493043, "aux_brier/mean_r": 0.7831502268218242, "aux_brier/n_active_tok": 295.875, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 73.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5979885057471265, "calib/avg_num_step_conf": 9.65234375, "calib/ece": 0.21739043824701193, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.19123505976095617, "calib/gap": 0.08982503192848013, "calib/mean_conf": 0.6775498007968127, "calib/mu_c": 0.7258620689655172, "calib/mu_w": 0.6360370370370371, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.21639442231075698, "calib/std_conf": 0.2224305982230339, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2046.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 555.18359375, "completions/mean_terminated_length": 563.99609375, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.16213333333333332, "grad_norm": 0.15894092619419098, "learning_rate": 1.3333333333333334e-06, "loss": 0.0171, "num_tokens": 34760670.0, "reward": 1.1123087406158447, "reward_std": 0.38148149847984314, "rewards/accuracy_reward_step": 0.453125, "rewards/final_brier_reward_step": 0.6836100816726685, "rewards/format_reward_step": 0.9765625, "step": 152 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.9415360288238537e-07, "aux_brier/mean_group_std": 0.14774658666911658, "aux_brier/mean_r": 0.7723663608067817, "aux_brier/n_active_tok": 316.125, "aux_brier/n_groups": 16.1875, "aux_brier/n_step_records": 79.03125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.49931782744282754, "calib/avg_num_step_conf": 10.30859375, "calib/ece": 0.21603174603174613, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.26587301587301587, "calib/gap": 0.0055275467775469656, "calib/mean_conf": 0.7323809523809524, "calib/mu_c": 0.7346621621621623, "calib/mu_w": 0.7291346153846153, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18055555555555564, "calib/std_conf": 0.214621958415288, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2199.0, "completions/max_terminated_length": 2199.0, "completions/mean_length": 584.890625, "completions/mean_terminated_length": 589.4960327148438, "completions/min_length": 0.0, "completions/min_terminated_length": 212.0, "epoch": 0.1632, "grad_norm": 0.24331292510032654, "learning_rate": 1.3055555555555556e-06, "loss": -0.0241, "num_tokens": 35017722.0, "reward": 1.2409024238586426, "reward_std": 0.28328534960746765, "rewards/accuracy_reward_step": 0.578125, "rewards/final_brier_reward_step": 0.6823593974113464, "rewards/format_reward_step": 0.984375, "step": 153 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.731180373805042e-07, "aux_brier/mean_group_std": 0.1388109483574081, "aux_brier/mean_r": 0.7655757917974565, "aux_brier/n_active_tok": 305.625, "aux_brier/n_groups": 16.375, "aux_brier/n_step_records": 76.40625, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.6012010113780024, "calib/avg_num_step_conf": 9.89453125, "calib/ece": 0.3026450592885376, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.2648221343873518, "calib/gap": 0.06418975979772445, "calib/mean_conf": 0.7280268774703559, "calib/mu_c": 0.7635469026548672, "calib/mu_w": 0.6993571428571428, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2920158102766799, "calib/std_conf": 0.21420561635032018, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2036.0, "completions/max_terminated_length": 2036.0, "completions/mean_length": 549.5859375, "completions/mean_terminated_length": 553.9133911132812, "completions/min_length": 0.0, "completions/min_terminated_length": 203.0, "epoch": 0.16426666666666667, "grad_norm": 0.06564311683177948, "learning_rate": 1.2777777777777779e-06, "loss": 0.0272, "num_tokens": 35262856.0, "reward": 1.098493218421936, "reward_std": 0.3088676333427429, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.6517854928970337, "rewards/format_reward_step": 0.98828125, "step": 154 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.4338358681208483e-07, "aux_brier/mean_group_std": 0.12692619903557353, "aux_brier/mean_r": 0.7818858282035643, "aux_brier/n_active_tok": 279.25, "aux_brier/n_groups": 13.6875, "aux_brier/n_step_records": 69.8125, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5334280771131983, "calib/avg_num_step_conf": 8.8125, "calib/ece": 0.2544313725490196, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.21176470588235294, "calib/gap": 0.028876050420168164, "calib/mean_conf": 0.6697254901960785, "calib/mu_c": 0.6851260504201682, "calib/mu_w": 0.65625, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22874509803921572, "calib/std_conf": 0.22393347037495637, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 507.01953125, "completions/mean_terminated_length": 509.00787353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 186.0, "epoch": 0.16533333333333333, "grad_norm": 0.034964244812726974, "learning_rate": 1.25e-06, "loss": 0.0088, "num_tokens": 35499869.0, "reward": 1.1278388500213623, "reward_std": 0.3299586772918701, "rewards/accuracy_reward_step": 0.46484375, "rewards/final_brier_reward_step": 0.667605459690094, "rewards/format_reward_step": 0.9921875, "step": 155 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.5496790095891342e-08, "aux_brier/mean_group_std": 0.12602172269006534, "aux_brier/mean_r": 0.7435304407654402, "aux_brier/n_active_tok": 299.5, "aux_brier/n_groups": 15.875, "aux_brier/n_step_records": 74.875, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5492846788933006, "calib/avg_num_step_conf": 9.71875, "calib/ece": 0.2830555555555557, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.30158730158730157, "calib/gap": 0.046642087351105976, "calib/mean_conf": 0.7194047619047619, "calib/mu_c": 0.7421705426356588, "calib/mu_w": 0.6955284552845529, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2452777777777779, "calib/std_conf": 0.24476268644492558, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2472.0, "completions/max_terminated_length": 2472.0, "completions/mean_length": 547.55078125, "completions/mean_terminated_length": 556.2420654296875, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 0.1664, "grad_norm": 0.0664546936750412, "learning_rate": 1.2222222222222223e-06, "loss": -0.0023, "num_tokens": 35744802.0, "reward": 1.1587910652160645, "reward_std": 0.30581116676330566, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6586019396781921, "rewards/format_reward_step": 0.98046875, "step": 156 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.444379716109001e-07, "aux_brier/mean_group_std": 0.14657826141004715, "aux_brier/mean_r": 0.7612826136247497, "aux_brier/n_active_tok": 306.625, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 76.65625, "calib/answer_extract_rate": 0.99609375, "calib/auroc": 0.5845720302242041, "calib/avg_num_step_conf": 10.171875, "calib/ece": 0.2230588235294118, "calib/final_conf_rate": 0.99609375, "calib/format_rate": 0.99609375, "calib/frac_conf_gt_0.9": 0.24705882352941178, "calib/gap": 0.0801096246748424, "calib/mean_conf": 0.712156862745098, "calib/mu_c": 0.7489130434782609, "calib/mu_w": 0.6688034188034185, "calib/nonempty_final_conf_rate": 0.99609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1970196078431373, "calib/std_conf": 0.22981931287546264, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1344.0, "completions/max_terminated_length": 1344.0, "completions/mean_length": 552.31640625, "completions/mean_terminated_length": 554.4823608398438, "completions/min_length": 0.0, "completions/min_terminated_length": 192.0, "epoch": 0.16746666666666668, "grad_norm": 0.05387997254729271, "learning_rate": 1.1944444444444446e-06, "loss": -0.0296, "num_tokens": 35989923.0, "reward": 1.213773488998413, "reward_std": 0.28469476103782654, "rewards/accuracy_reward_step": 0.5390625, "rewards/final_brier_reward_step": 0.7066562175750732, "rewards/format_reward_step": 0.99609375, "step": 157 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.475260010402879e-07, "aux_brier/mean_group_std": 0.12277746991169511, "aux_brier/mean_r": 0.7717797308190276, "aux_brier/n_active_tok": 301.375, "aux_brier/n_groups": 15.84375, "aux_brier/n_step_records": 75.34375, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.5251703518286748, "calib/avg_num_step_conf": 10.12109375, "calib/ece": 0.21101214574898786, "calib/final_conf_rate": 0.96484375, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.2388663967611336, "calib/gap": 0.026731330830204514, "calib/mean_conf": 0.6919838056680162, "calib/mu_c": 0.702156862745098, "calib/mu_w": 0.6754255319148935, "calib/nonempty_final_conf_rate": 0.96484375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.14178137651821865, "calib/std_conf": 0.23126240363034653, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2964.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 556.265625, "completions/mean_terminated_length": 567.3466186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 171.0, "epoch": 0.16853333333333334, "grad_norm": 0.062464676797389984, "learning_rate": 1.1666666666666668e-06, "loss": 0.0426, "num_tokens": 36237567.0, "reward": 1.2532968521118164, "reward_std": 0.32989680767059326, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.6928750276565552, "rewards/format_reward_step": 0.96484375, "step": 158 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.0174982468554887e-07, "aux_brier/mean_group_std": 0.1261045529526276, "aux_brier/mean_r": 0.7995295352517525, "aux_brier/n_active_tok": 284.375, "aux_brier/n_groups": 14.46875, "aux_brier/n_step_records": 71.09375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.4827797880589299, "calib/avg_num_step_conf": 9.01953125, "calib/ece": 0.20079365079365083, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.17857142857142858, "calib/gap": -0.014422331351770623, "calib/mean_conf": 0.6492857142857144, "calib/mu_c": 0.6432191780821918, "calib/mu_w": 0.6576415094339624, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.13535714285714293, "calib/std_conf": 0.2180657341241236, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2918.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 518.59765625, "completions/mean_terminated_length": 522.6810913085938, "completions/min_length": 0.0, "completions/min_terminated_length": 64.0, "epoch": 0.1696, "grad_norm": 0.03297615423798561, "learning_rate": 1.138888888888889e-06, "loss": 0.0159, "num_tokens": 36475112.0, "reward": 1.2339849472045898, "reward_std": 0.29382210969924927, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.6859399080276489, "rewards/format_reward_step": 0.984375, "step": 159 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.561264022653113e-07, "aux_brier/mean_group_std": 0.12557265117017866, "aux_brier/mean_r": 0.7428592234899737, "aux_brier/n_active_tok": 308.375, "aux_brier/n_groups": 14.90625, "aux_brier/n_step_records": 77.09375, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5450541152781273, "calib/avg_num_step_conf": 9.72265625, "calib/ece": 0.2701581027667984, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.3557312252964427, "calib/gap": 0.052652907123080794, "calib/mean_conf": 0.7464426877470356, "calib/mu_c": 0.7705839416058393, "calib/mu_w": 0.7179310344827585, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2375494071146245, "calib/std_conf": 0.23149965143070703, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1949.0, "completions/max_terminated_length": 1949.0, "completions/mean_length": 552.23828125, "completions/mean_terminated_length": 552.23828125, "completions/min_length": 201.0, "completions/min_terminated_length": 201.0, "epoch": 0.17066666666666666, "grad_norm": 0.19734477996826172, "learning_rate": 1.111111111111111e-06, "loss": 0.0347, "num_tokens": 36721325.0, "reward": 1.197866678237915, "reward_std": 0.2875242829322815, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6742792725563049, "rewards/format_reward_step": 0.98828125, "step": 160 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.4726692812726547e-07, "aux_brier/mean_group_std": 0.12691666415635272, "aux_brier/mean_r": 0.7496726792116155, "aux_brier/n_active_tok": 279.375, "aux_brier/n_groups": 13.1875, "aux_brier/n_step_records": 69.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5176443275034824, "calib/avg_num_step_conf": 9.2265625, "calib/ece": 0.20284584980237164, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.28063241106719367, "calib/gap": 0.019548057576226685, "calib/mean_conf": 0.7101185770750988, "calib/mu_c": 0.7156043956043957, "calib/mu_w": 0.696056338028169, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.0967984189723321, "calib/std_conf": 0.24223185282794146, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2200.0, "completions/max_terminated_length": 2200.0, "completions/mean_length": 531.47265625, "completions/mean_terminated_length": 533.556884765625, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.17173333333333332, "grad_norm": 0.10202208906412125, "learning_rate": 1.0833333333333335e-06, "loss": 0.0289, "num_tokens": 36961302.0, "reward": 1.3936083316802979, "reward_std": 0.2275093048810959, "rewards/accuracy_reward_step": 0.71484375, "rewards/final_brier_reward_step": 0.7384960651397705, "rewards/format_reward_step": 0.98828125, "step": 161 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.2315262043482988e-07, "aux_brier/mean_group_std": 0.12375735595768081, "aux_brier/mean_r": 0.7267927646386473, "aux_brier/n_active_tok": 293.75, "aux_brier/n_groups": 14.3125, "aux_brier/n_step_records": 73.4375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.4668095801301005, "calib/avg_num_step_conf": 9.515625, "calib/ece": 0.21763779527559055, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.31496062992125984, "calib/gap": -0.01667356593731506, "calib/mean_conf": 0.7501574803149607, "calib/mu_c": 0.7451685393258428, "calib/mu_w": 0.7618421052631579, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.133503937007874, "calib/std_conf": 0.2269013599969254, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2190.0, "completions/max_terminated_length": 2190.0, "completions/mean_length": 535.77734375, "completions/mean_terminated_length": 537.8784790039062, "completions/min_length": 0.0, "completions/min_terminated_length": 174.0, "epoch": 0.1728, "grad_norm": 0.08193613588809967, "learning_rate": 1.0555555555555557e-06, "loss": -0.0006, "num_tokens": 37202605.0, "reward": 1.372331976890564, "reward_std": 0.2566692531108856, "rewards/accuracy_reward_step": 0.6953125, "rewards/final_brier_reward_step": 0.723703145980835, "rewards/format_reward_step": 0.9921875, "step": 162 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.674164930049905e-07, "aux_brier/mean_group_std": 0.13808864230548526, "aux_brier/mean_r": 0.7291608496776432, "aux_brier/n_active_tok": 314.25, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 78.5625, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5460588793922128, "calib/avg_num_step_conf": 10.39453125, "calib/ece": 0.26658730158730165, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.28174603174603174, "calib/gap": 0.05227350427350419, "calib/mean_conf": 0.7570634920634921, "calib/mu_c": 0.7813333333333332, "calib/mu_w": 0.729059829059829, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.24396825396825406, "calib/std_conf": 0.21383522606265276, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2636.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 597.42578125, "completions/mean_terminated_length": 599.7686767578125, "completions/min_length": 0.0, "completions/min_terminated_length": 166.0, "epoch": 0.17386666666666667, "grad_norm": 0.14409030973911285, "learning_rate": 1.0277777777777777e-06, "loss": 0.0219, "num_tokens": 37460378.0, "reward": 1.187504529953003, "reward_std": 0.31250619888305664, "rewards/accuracy_reward_step": 0.52734375, "rewards/final_brier_reward_step": 0.6718930006027222, "rewards/format_reward_step": 0.984375, "step": 163 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.507768979258088e-08, "aux_brier/mean_group_std": 0.13708992804717865, "aux_brier/mean_r": 0.7332738047370835, "aux_brier/n_active_tok": 324.25, "aux_brier/n_groups": 18.0, "aux_brier/n_step_records": 81.0625, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5002587656876698, "calib/avg_num_step_conf": 10.48828125, "calib/ece": 0.24116465863453815, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.3253012048192771, "calib/gap": 0.025842929227584577, "calib/mean_conf": 0.7524096385542168, "calib/mu_c": 0.7646564885496183, "calib/mu_w": 0.7388135593220337, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23373493975903614, "calib/std_conf": 0.21775741095903073, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2839.0, "completions/max_terminated_length": 2839.0, "completions/mean_length": 621.26171875, "completions/mean_terminated_length": 626.153564453125, "completions/min_length": 0.0, "completions/min_terminated_length": 272.0, "epoch": 0.17493333333333333, "grad_norm": 0.20858323574066162, "learning_rate": 1.0000000000000002e-06, "loss": 0.0054, "num_tokens": 37725557.0, "reward": 1.1577627658843994, "reward_std": 0.38337188959121704, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6466757655143738, "rewards/format_reward_step": 0.96875, "step": 164 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.474337839457167e-08, "aux_brier/mean_group_std": 0.13658670486017807, "aux_brier/mean_r": 0.7416463819840573, "aux_brier/n_active_tok": 316.125, "aux_brier/n_groups": 15.875, "aux_brier/n_step_records": 79.03125, "calib/answer_extract_rate": 0.98046875, "calib/auroc": 0.4655030149158997, "calib/avg_num_step_conf": 10.32421875, "calib/ece": 0.3632738095238095, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.2896825396825397, "calib/gap": -0.024930815614090718, "calib/mean_conf": 0.7630753968253969, "calib/mu_c": 0.7495217391304347, "calib/mu_w": 0.7744525547445255, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.33499999999999996, "calib/std_conf": 0.20081423113522068, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2180.0, "completions/max_terminated_length": 2180.0, "completions/mean_length": 593.52734375, "completions/mean_terminated_length": 593.52734375, "completions/min_length": 243.0, "completions/min_terminated_length": 243.0, "epoch": 0.176, "grad_norm": 0.3347119092941284, "learning_rate": 9.722222222222224e-07, "loss": 0.0236, "num_tokens": 37983076.0, "reward": 1.0855225324630737, "reward_std": 0.38842636346817017, "rewards/accuracy_reward_step": 0.44921875, "rewards/final_brier_reward_step": 0.5920901298522949, "rewards/format_reward_step": 0.9765625, "step": 165 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -2.660470514170843e-07, "aux_brier/mean_group_std": 0.14491428261953823, "aux_brier/mean_r": 0.7301434048541713, "aux_brier/n_active_tok": 306.625, "aux_brier/n_groups": 15.0625, "aux_brier/n_step_records": 76.65625, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.4630952380952381, "calib/avg_num_step_conf": 10.3203125, "calib/ece": 0.3020080321285141, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.321285140562249, "calib/gap": -0.020974206349206215, "calib/mean_conf": 0.7371084337349397, "calib/mu_c": 0.7282638888888889, "calib/mu_w": 0.7492380952380951, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2304016064257029, "calib/std_conf": 0.24145407964968105, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2744.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 628.375, "completions/mean_terminated_length": 633.3228149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 236.0, "epoch": 0.17706666666666668, "grad_norm": 0.06704939156770706, "learning_rate": 9.444444444444445e-07, "loss": 0.0287, "num_tokens": 38250124.0, "reward": 1.2075164318084717, "reward_std": 0.31614387035369873, "rewards/accuracy_reward_step": 0.5625, "rewards/final_brier_reward_step": 0.6425660252571106, "rewards/format_reward_step": 0.96875, "step": 166 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.5604936076663023e-07, "aux_brier/mean_group_std": 0.11707872614701348, "aux_brier/mean_r": 0.7300927916334877, "aux_brier/n_active_tok": 304.0, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 76.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5730337078651686, "calib/avg_num_step_conf": 9.83984375, "calib/ece": 0.20295275590551187, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4015748031496063, "calib/gap": 0.046166155941436804, "calib/mean_conf": 0.7659448818897638, "calib/mu_c": 0.7821212121212122, "calib/mu_w": 0.7359550561797754, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.15964566929133864, "calib/std_conf": 0.2396677755379001, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2608.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 578.671875, "completions/mean_terminated_length": 580.9412231445312, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.17813333333333334, "grad_norm": 0.1026303693652153, "learning_rate": 9.166666666666666e-07, "loss": -0.0013, "num_tokens": 38503872.0, "reward": 1.319819688796997, "reward_std": 0.34465086460113525, "rewards/accuracy_reward_step": 0.64453125, "rewards/final_brier_reward_step": 0.7167792916297913, "rewards/format_reward_step": 0.9921875, "step": 167 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.94002819172668e-07, "aux_brier/mean_group_std": 0.12431108511041629, "aux_brier/mean_r": 0.6756198881951676, "aux_brier/n_active_tok": 330.625, "aux_brier/n_groups": 18.75, "aux_brier/n_step_records": 82.65625, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4620420536635707, "calib/avg_num_step_conf": 10.984375, "calib/ece": 0.30474199999999996, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.464, "calib/gap": 0.021328173374613124, "calib/mean_conf": 0.7931419999999999, "calib/mu_c": 0.8028676470588235, "calib/mu_w": 0.7815394736842104, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 0.99609375, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.276942, "calib/std_conf": 0.24036952559756822, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2910.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 629.79296875, "completions/mean_terminated_length": 639.7897338867188, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.1792, "grad_norm": 0.1148962751030922, "learning_rate": 8.88888888888889e-07, "loss": -0.0001, "num_tokens": 38769771.0, "reward": 1.1782002449035645, "reward_std": 0.40775153040885925, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.6268634796142578, "rewards/format_reward_step": 0.97265625, "step": 168 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -6.828714768092325e-07, "aux_brier/mean_group_std": 0.11830183188373451, "aux_brier/mean_r": 0.7243447293174061, "aux_brier/n_active_tok": 310.5, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 77.625, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5558035714285715, "calib/avg_num_step_conf": 9.76171875, "calib/ece": 0.25236220472440957, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.3779527559055118, "calib/gap": 0.03988556338028182, "calib/mean_conf": 0.7741732283464566, "calib/mu_c": 0.7917605633802818, "calib/mu_w": 0.751875, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2337401574803151, "calib/std_conf": 0.2037075290545609, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1996.0, "completions/max_terminated_length": 1996.0, "completions/mean_length": 572.15234375, "completions/mean_terminated_length": 574.3961181640625, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 0.18026666666666666, "grad_norm": 0.33049607276916504, "learning_rate": 8.611111111111112e-07, "loss": 0.0462, "num_tokens": 39020426.0, "reward": 1.220787525177002, "reward_std": 0.2584887146949768, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6800249814987183, "rewards/format_reward_step": 0.9921875, "step": 169 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.123947742850186e-07, "aux_brier/mean_group_std": 0.11455481599942005, "aux_brier/mean_r": 0.6839772577664597, "aux_brier/n_active_tok": 307.375, "aux_brier/n_groups": 14.09375, "aux_brier/n_step_records": 76.84375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5454807692307692, "calib/avg_num_step_conf": 9.8359375, "calib/ece": 0.24874015748031492, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4921259842519685, "calib/gap": 0.05461538461538462, "calib/mean_conf": 0.8076377952755907, "calib/mu_c": 0.83, "calib/mu_w": 0.7753846153846153, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23291338582677162, "calib/std_conf": 0.2130799882817784, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2209.0, "completions/max_terminated_length": 2209.0, "completions/mean_length": 594.58203125, "completions/mean_terminated_length": 596.9137573242188, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.18133333333333335, "grad_norm": 0.06769291311502457, "learning_rate": 8.333333333333333e-07, "loss": 0.0137, "num_tokens": 39276791.0, "reward": 1.2513952255249023, "reward_std": 0.30168139934539795, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.685268759727478, "rewards/format_reward_step": 0.98828125, "step": 170 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.0209773233674797e-07, "aux_brier/mean_group_std": 0.1321099699793983, "aux_brier/mean_r": 0.7141771864276769, "aux_brier/n_active_tok": 298.375, "aux_brier/n_groups": 14.34375, "aux_brier/n_step_records": 74.59375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5144159226190477, "calib/avg_num_step_conf": 9.51171875, "calib/ece": 0.31582677165354334, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.35826771653543305, "calib/gap": 0.008085317460317376, "calib/mean_conf": 0.7766141732283466, "calib/mu_c": 0.7806249999999999, "calib/mu_w": 0.7725396825396825, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29425196850393703, "calib/std_conf": 0.20587329056393774, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2359.0, "completions/max_terminated_length": 2359.0, "completions/mean_length": 561.015625, "completions/mean_terminated_length": 561.015625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.1824, "grad_norm": 0.04003489390015602, "learning_rate": 8.055555555555557e-07, "loss": 0.0353, "num_tokens": 39527307.0, "reward": 1.154179334640503, "reward_std": 0.30687057971954346, "rewards/accuracy_reward_step": 0.5, "rewards/final_brier_reward_step": 0.6323422193527222, "rewards/format_reward_step": 0.9921875, "step": 171 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.545451762442588e-07, "aux_brier/mean_group_std": 0.10414918437127466, "aux_brier/mean_r": 0.712658002624457, "aux_brier/n_active_tok": 295.0, "aux_brier/n_groups": 14.03125, "aux_brier/n_step_records": 73.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.530218632502437, "calib/avg_num_step_conf": 9.859375, "calib/ece": 0.22098814229249014, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.41106719367588934, "calib/gap": 0.014676925219328618, "calib/mean_conf": 0.8118972332015809, "calib/mu_c": 0.8168862275449102, "calib/mu_w": 0.8022093023255816, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.18640316205533602, "calib/std_conf": 0.18762810462326462, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1814.0, "completions/max_terminated_length": 1814.0, "completions/mean_length": 540.671875, "completions/mean_terminated_length": 547.0830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 173.0, "epoch": 0.18346666666666667, "grad_norm": 0.09831096976995468, "learning_rate": 7.777777777777779e-07, "loss": -0.0324, "num_tokens": 39769071.0, "reward": 1.3253530263900757, "reward_std": 0.3200302720069885, "rewards/accuracy_reward_step": 0.65234375, "rewards/final_brier_reward_step": 0.7154746055603027, "rewards/format_reward_step": 0.98828125, "step": 172 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.227553016602272e-07, "aux_brier/mean_group_std": 0.12887376354818048, "aux_brier/mean_r": 0.6881525563024199, "aux_brier/n_active_tok": 323.375, "aux_brier/n_groups": 17.125, "aux_brier/n_step_records": 80.84375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5216328924219716, "calib/avg_num_step_conf": 10.828125, "calib/ece": 0.2659126984126984, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.4365079365079365, "calib/gap": 0.0339981755391936, "calib/mean_conf": 0.7893253968253968, "calib/mu_c": 0.8032214765100671, "calib/mu_w": 0.7692233009708735, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23198412698412693, "calib/std_conf": 0.23270310566631314, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2009.0, "completions/max_terminated_length": 2009.0, "completions/mean_length": 627.79296875, "completions/mean_terminated_length": 630.2549438476562, "completions/min_length": 0.0, "completions/min_terminated_length": 148.0, "epoch": 0.18453333333333333, "grad_norm": 0.17116612195968628, "learning_rate": 7.5e-07, "loss": 0.0181, "num_tokens": 40032946.0, "reward": 1.2438373565673828, "reward_std": 0.37067359685897827, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6706616878509521, "rewards/format_reward_step": 0.98046875, "step": 173 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 5.549296602791465e-08, "aux_brier/mean_group_std": 0.13978389570912286, "aux_brier/mean_r": 0.727931955577553, "aux_brier/n_active_tok": 323.875, "aux_brier/n_groups": 18.84375, "aux_brier/n_step_records": 80.96875, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.6030387818844352, "calib/avg_num_step_conf": 10.53515625, "calib/ece": 0.35875502008032134, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.37751004016064255, "calib/gap": 0.06310970848516395, "calib/mean_conf": 0.7878313253012049, "calib/mu_c": 0.8223008849557522, "calib/mu_w": 0.7591911764705882, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3463855421686748, "calib/std_conf": 0.20829802671746114, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2830.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 668.83203125, "completions/mean_terminated_length": 674.0984497070312, "completions/min_length": 0.0, "completions/min_terminated_length": 215.0, "epoch": 0.1856, "grad_norm": 0.26240843534469604, "learning_rate": 7.222222222222222e-07, "loss": 0.0223, "num_tokens": 40308399.0, "reward": 1.0805542469024658, "reward_std": 0.3596838414669037, "rewards/accuracy_reward_step": 0.44140625, "rewards/final_brier_reward_step": 0.611279308795929, "rewards/format_reward_step": 0.97265625, "step": 174 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.8007512625771724e-07, "aux_brier/mean_group_std": 0.1390635787546291, "aux_brier/mean_r": 0.7354133986520142, "aux_brier/n_active_tok": 302.875, "aux_brier/n_groups": 15.28125, "aux_brier/n_step_records": 75.71875, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5173918624053395, "calib/avg_num_step_conf": 9.78125, "calib/ece": 0.3390909090909091, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.28063241106719367, "calib/gap": 0.04196187909125926, "calib/mean_conf": 0.7253359683794466, "calib/mu_c": 0.7497169811320756, "calib/mu_w": 0.7077551020408164, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.32272727272727275, "calib/std_conf": 0.25181472926784115, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2881.0, "completions/max_terminated_length": 2881.0, "completions/mean_length": 596.90234375, "completions/mean_terminated_length": 596.90234375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.18666666666666668, "grad_norm": 0.18161000311374664, "learning_rate": 6.944444444444446e-07, "loss": 0.0239, "num_tokens": 40567030.0, "reward": 1.06131911277771, "reward_std": 0.3406786620616913, "rewards/accuracy_reward_step": 0.4140625, "rewards/final_brier_reward_step": 0.612463653087616, "rewards/format_reward_step": 0.98828125, "step": 175 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.1970234808109677e-07, "aux_brier/mean_group_std": 0.13920629487457764, "aux_brier/mean_r": 0.7091455783349397, "aux_brier/n_active_tok": 320.0, "aux_brier/n_groups": 17.875, "aux_brier/n_step_records": 80.0, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5805806451612903, "calib/avg_num_step_conf": 10.58203125, "calib/ece": 0.32654618473895586, "calib/final_conf_rate": 0.97265625, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.39357429718875503, "calib/gap": 0.04724322580645157, "calib/mean_conf": 0.8083132530120483, "calib/mu_c": 0.8318399999999999, "calib/mu_w": 0.7845967741935483, "calib/nonempty_final_conf_rate": 0.97265625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.31642570281124505, "calib/std_conf": 0.20189157261617924, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3014.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 601.2890625, "completions/mean_terminated_length": 606.0236206054688, "completions/min_length": 0.0, "completions/min_terminated_length": 129.0, "epoch": 0.18773333333333334, "grad_norm": 0.3984068036079407, "learning_rate": 6.666666666666667e-07, "loss": -0.0087, "num_tokens": 40825024.0, "reward": 1.1270815134048462, "reward_std": 0.3631778657436371, "rewards/accuracy_reward_step": 0.48828125, "rewards/final_brier_reward_step": 0.6177011728286743, "rewards/format_reward_step": 0.96875, "step": 176 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.4940496025648304e-07, "aux_brier/mean_group_std": 0.12208929355906557, "aux_brier/mean_r": 0.7155938312015591, "aux_brier/n_active_tok": 313.5, "aux_brier/n_groups": 16.5625, "aux_brier/n_step_records": 78.375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5093722201042065, "calib/avg_num_step_conf": 10.19921875, "calib/ece": 0.28416733067729083, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.30677290836653387, "calib/gap": 0.034957809124412376, "calib/mean_conf": 0.7414581673306773, "calib/mu_c": 0.7584496124031008, "calib/mu_w": 0.7234918032786885, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.25584063745019925, "calib/std_conf": 0.25027556375279786, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2354.0, "completions/max_terminated_length": 2354.0, "completions/mean_length": 574.703125, "completions/mean_terminated_length": 581.517822265625, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 0.1888, "grad_norm": 0.08498404920101166, "learning_rate": 6.388888888888889e-07, "loss": -0.0273, "num_tokens": 41075980.0, "reward": 1.1542657613754272, "reward_std": 0.3269786536693573, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6405003070831299, "rewards/format_reward_step": 0.98046875, "step": 177 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.998680712131609e-07, "aux_brier/mean_group_std": 0.11104178135653618, "aux_brier/mean_r": 0.7157304284767535, "aux_brier/n_active_tok": 295.875, "aux_brier/n_groups": 14.1875, "aux_brier/n_step_records": 73.96875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5460354720918101, "calib/avg_num_step_conf": 9.51953125, "calib/ece": 0.2464000000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.344, "calib/gap": 0.05515779864371417, "calib/mean_conf": 0.7792, "calib/mu_c": 0.8030281690140845, "calib/mu_w": 0.7478703703703703, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2288000000000001, "calib/std_conf": 0.2233494123565137, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2644.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 544.80078125, "completions/mean_terminated_length": 546.9373168945312, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.18986666666666666, "grad_norm": 0.1378621906042099, "learning_rate": 6.111111111111112e-07, "loss": 0.0093, "num_tokens": 41321521.0, "reward": 1.2118072509765625, "reward_std": 0.37717777490615845, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6675414443016052, "rewards/format_reward_step": 0.97265625, "step": 178 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 2.420083540788376e-07, "aux_brier/mean_group_std": 0.11303950486766368, "aux_brier/mean_r": 0.7173948105286239, "aux_brier/n_active_tok": 301.25, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 75.3125, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5191171477079796, "calib/avg_num_step_conf": 10.10546875, "calib/ece": 0.21400000000000002, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.36, "calib/gap": 0.029881154499151186, "calib/mean_conf": 0.792, "calib/mu_c": 0.8033548387096775, "calib/mu_w": 0.7734736842105263, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.19300000000000003, "calib/std_conf": 0.19980390386576533, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2502.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 581.30078125, "completions/mean_terminated_length": 588.1937255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 241.0, "epoch": 0.19093333333333334, "grad_norm": 0.22822193801403046, "learning_rate": 5.833333333333334e-07, "loss": 0.0258, "num_tokens": 41576598.0, "reward": 1.2668393850326538, "reward_std": 0.36677446961402893, "rewards/accuracy_reward_step": 0.60546875, "rewards/final_brier_reward_step": 0.6923577785491943, "rewards/format_reward_step": 0.9765625, "step": 179 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.296333832587429e-07, "aux_brier/mean_group_std": 0.1467650592734881, "aux_brier/mean_r": 0.6713683648774496, "aux_brier/n_active_tok": 350.0, "aux_brier/n_groups": 17.0625, "aux_brier/n_step_records": 87.5, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.5514319888364675, "calib/avg_num_step_conf": 11.74609375, "calib/ece": 0.295242, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.97265625, "calib/frac_conf_gt_0.9": 0.556, "calib/gap": 0.05286965911356256, "calib/mean_conf": 0.848842, "calib/mu_c": 0.8702013422818794, "calib/mu_w": 0.8173316831683168, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.274042, "calib/std_conf": 0.19888212598421207, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2046.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 661.2421875, "completions/mean_terminated_length": 669.0830078125, "completions/min_length": 0.0, "completions/min_terminated_length": 228.0, "epoch": 0.192, "grad_norm": 0.22756622731685638, "learning_rate": 5.555555555555555e-07, "loss": 0.0056, "num_tokens": 41849732.0, "reward": 1.234571099281311, "reward_std": 0.35712069272994995, "rewards/accuracy_reward_step": 0.58203125, "rewards/final_brier_reward_step": 0.6648468375205994, "rewards/format_reward_step": 0.97265625, "step": 180 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -5.952673761133731e-07, "aux_brier/mean_group_std": 0.12988499873953285, "aux_brier/mean_r": 0.7105410062150648, "aux_brier/n_active_tok": 314.75, "aux_brier/n_groups": 16.65625, "aux_brier/n_step_records": 78.6875, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5145697522816167, "calib/avg_num_step_conf": 10.21484375, "calib/ece": 0.34563306451612896, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.4475806451612903, "calib/gap": 0.019005867014341704, "calib/mean_conf": 0.7973508064516129, "calib/mu_c": 0.8073135593220341, "calib/mu_w": 0.7883076923076924, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3335887096774193, "calib/std_conf": 0.21528795869866935, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2648.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 592.8203125, "completions/mean_terminated_length": 595.1451416015625, "completions/min_length": 0.0, "completions/min_terminated_length": 196.0, "epoch": 0.19306666666666666, "grad_norm": 0.20192381739616394, "learning_rate": 5.277777777777779e-07, "loss": 0.0639, "num_tokens": 42107758.0, "reward": 1.0910775661468506, "reward_std": 0.3487577736377716, "rewards/accuracy_reward_step": 0.4609375, "rewards/final_brier_reward_step": 0.5908727645874023, "rewards/format_reward_step": 0.96484375, "step": 181 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 4.255648992923522e-07, "aux_brier/mean_group_std": 0.11135627675651893, "aux_brier/mean_r": 0.6907847942857936, "aux_brier/n_active_tok": 327.0, "aux_brier/n_groups": 16.5, "aux_brier/n_step_records": 81.75, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.5520652898067956, "calib/avg_num_step_conf": 10.828125, "calib/ece": 0.23513833992094874, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.43478260869565216, "calib/gap": 0.054120586275816396, "calib/mean_conf": 0.8169565217391305, "calib/mu_c": 0.8372784810126582, "calib/mu_w": 0.7831578947368418, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2137944664031622, "calib/std_conf": 0.19068792170752427, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2737.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 597.72265625, "completions/mean_terminated_length": 600.0667114257812, "completions/min_length": 0.0, "completions/min_terminated_length": 202.0, "epoch": 0.19413333333333332, "grad_norm": 0.02944478951394558, "learning_rate": 5.000000000000001e-07, "loss": 0.0262, "num_tokens": 42366935.0, "reward": 1.2856700420379639, "reward_std": 0.2591235935688019, "rewards/accuracy_reward_step": 0.6171875, "rewards/final_brier_reward_step": 0.7051804661750793, "rewards/format_reward_step": 0.984375, "step": 182 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.6183354104720715e-07, "aux_brier/mean_group_std": 0.11808640196865099, "aux_brier/mean_r": 0.7135023719089151, "aux_brier/n_active_tok": 314.875, "aux_brier/n_groups": 15.65625, "aux_brier/n_step_records": 78.71875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.4999037783052152, "calib/avg_num_step_conf": 10.7265625, "calib/ece": 0.32677800000000007, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.4, "calib/gap": -0.0005118673423568643, "calib/mean_conf": 0.790778, "calib/mu_c": 0.7905343511450382, "calib/mu_w": 0.7910462184873951, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29677800000000004, "calib/std_conf": 0.21987768353336815, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2706.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 634.01953125, "completions/mean_terminated_length": 639.0117797851562, "completions/min_length": 0.0, "completions/min_terminated_length": 141.0, "epoch": 0.1952, "grad_norm": 0.25354498624801636, "learning_rate": 4.7222222222222226e-07, "loss": 0.013, "num_tokens": 42635924.0, "reward": 1.1540048122406006, "reward_std": 0.4529300630092621, "rewards/accuracy_reward_step": 0.51171875, "rewards/final_brier_reward_step": 0.6160194873809814, "rewards/format_reward_step": 0.9765625, "step": 183 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.2697582330598607e-07, "aux_brier/mean_group_std": 0.14336063705529795, "aux_brier/mean_r": 0.7043994428919576, "aux_brier/n_active_tok": 321.375, "aux_brier/n_groups": 15.09375, "aux_brier/n_step_records": 80.34375, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.44517338331771317, "calib/avg_num_step_conf": 10.6328125, "calib/ece": 0.26717131474103595, "calib/final_conf_rate": 0.98046875, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.44223107569721115, "calib/gap": -0.023902798232695033, "calib/mean_conf": 0.807808764940239, "calib/mu_c": 0.7985714285714287, "calib/mu_w": 0.8224742268041237, "calib/nonempty_final_conf_rate": 0.98046875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23071713147410372, "calib/std_conf": 0.20463060510268194, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2129.0, "completions/max_terminated_length": 2129.0, "completions/mean_length": 592.03515625, "completions/mean_terminated_length": 599.0553588867188, "completions/min_length": 0.0, "completions/min_terminated_length": 242.0, "epoch": 0.19626666666666667, "grad_norm": 0.14763610064983368, "learning_rate": 4.444444444444445e-07, "loss": -0.0166, "num_tokens": 42892765.0, "reward": 1.256502389907837, "reward_std": 0.35554298758506775, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6588218212127686, "rewards/format_reward_step": 0.98046875, "step": 184 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 6.748861667849049e-07, "aux_brier/mean_group_std": 0.1525347031811119, "aux_brier/mean_r": 0.7060887684059763, "aux_brier/n_active_tok": 333.25, "aux_brier/n_groups": 19.625, "aux_brier/n_step_records": 83.3125, "calib/answer_extract_rate": 0.95703125, "calib/auroc": 0.6198409433703551, "calib/avg_num_step_conf": 11.21484375, "calib/ece": 0.22514285714285714, "calib/final_conf_rate": 0.95703125, "calib/format_rate": 0.953125, "calib/frac_conf_gt_0.9": 0.4, "calib/gap": 0.09866790072672404, "calib/mean_conf": 0.7900408163265307, "calib/mu_c": 0.831118881118881, "calib/mu_w": 0.732450980392157, "calib/nonempty_final_conf_rate": 0.95703125, "calib/nonempty_reasoning_rate": 0.9921875, "calib/nonempty_step_conf_rate": 0.98828125, "calib/pce": 0.21575510204081633, "calib/std_conf": 0.22385764054860102, "calib/step_conf_rate": 0.98828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2785.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 601.6015625, "completions/mean_terminated_length": 611.1508178710938, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.19733333333333333, "grad_norm": 0.09341681748628616, "learning_rate": 4.1666666666666667e-07, "loss": 0.0095, "num_tokens": 43153695.0, "reward": 1.205216884613037, "reward_std": 0.27410241961479187, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6802429556846619, "rewards/format_reward_step": 0.953125, "step": 185 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.3564172646196582e-07, "aux_brier/mean_group_std": 0.12399026065331648, "aux_brier/mean_r": 0.705690339450971, "aux_brier/n_active_tok": 325.125, "aux_brier/n_groups": 16.125, "aux_brier/n_step_records": 81.28125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5575894295842733, "calib/avg_num_step_conf": 10.6328125, "calib/ece": 0.2943650793650794, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.3611111111111111, "calib/gap": 0.02039187882694171, "calib/mean_conf": 0.7902380952380952, "calib/mu_c": 0.798896551724138, "calib/mu_w": 0.7785046728971963, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2546031746031746, "calib/std_conf": 0.23210378181511784, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2262.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 603.35546875, "completions/mean_terminated_length": 605.7216186523438, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.1984, "grad_norm": 0.2975756525993347, "learning_rate": 3.8888888888888895e-07, "loss": 0.0206, "num_tokens": 43413194.0, "reward": 1.2223985195159912, "reward_std": 0.31535643339157104, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.655219554901123, "rewards/format_reward_step": 0.984375, "step": 186 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -8.55057671056425e-08, "aux_brier/mean_group_std": 0.11910087325314367, "aux_brier/mean_r": 0.6735562742804757, "aux_brier/n_active_tok": 347.625, "aux_brier/n_groups": 21.15625, "aux_brier/n_step_records": 86.90625, "calib/answer_extract_rate": 0.96484375, "calib/auroc": 0.49827265479670474, "calib/avg_num_step_conf": 12.45703125, "calib/ece": 0.29754032258064517, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96484375, "calib/frac_conf_gt_0.9": 0.4435483870967742, "calib/gap": 0.012326601116130709, "calib/mean_conf": 0.811491935483871, "calib/mu_c": 0.8167605633802818, "calib/mu_w": 0.8044339622641511, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26822580645161287, "calib/std_conf": 0.22131159598089012, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3011.0, "completions/max_terminated_length": 3011.0, "completions/mean_length": 657.10546875, "completions/mean_terminated_length": 664.8972778320312, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 0.19946666666666665, "grad_norm": 0.6251574158668518, "learning_rate": 3.611111111111111e-07, "loss": 0.0182, "num_tokens": 43682957.0, "reward": 1.1957060098648071, "reward_std": 0.38417935371398926, "rewards/accuracy_reward_step": 0.5546875, "rewards/final_brier_reward_step": 0.6343867182731628, "rewards/format_reward_step": 0.96484375, "step": 187 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -1.0944355566949726e-07, "aux_brier/mean_group_std": 0.12051941412352694, "aux_brier/mean_r": 0.694630832225542, "aux_brier/n_active_tok": 315.75, "aux_brier/n_groups": 14.65625, "aux_brier/n_step_records": 78.9375, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5367207792207792, "calib/avg_num_step_conf": 10.51953125, "calib/ece": 0.2666535433070867, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4566929133858268, "calib/gap": 0.03677532467532485, "calib/mean_conf": 0.7801968503937008, "calib/mu_c": 0.7946753246753246, "calib/mu_w": 0.7578999999999998, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.22027559055118115, "calib/std_conf": 0.24276856349855278, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2134.0, "completions/max_terminated_length": 2134.0, "completions/mean_length": 623.03125, "completions/mean_terminated_length": 625.4745483398438, "completions/min_length": 0.0, "completions/min_terminated_length": 190.0, "epoch": 0.20053333333333334, "grad_norm": 0.048046354204416275, "learning_rate": 3.3333333333333335e-07, "loss": 0.0027, "num_tokens": 43946525.0, "reward": 1.2687289714813232, "reward_std": 0.28813987970352173, "rewards/accuracy_reward_step": 0.6015625, "rewards/final_brier_reward_step": 0.6842910051345825, "rewards/format_reward_step": 0.9921875, "step": 188 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.903525094665742e-07, "aux_brier/mean_group_std": 0.13133251916872882, "aux_brier/mean_r": 0.6917744470619167, "aux_brier/n_active_tok": 318.0, "aux_brier/n_groups": 15.9375, "aux_brier/n_step_records": 79.5, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4858169682967527, "calib/avg_num_step_conf": 10.24609375, "calib/ece": 0.28181818181818186, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4308300395256917, "calib/gap": 0.0025863175458863408, "calib/mean_conf": 0.8273517786561264, "calib/mu_c": 0.82843537414966, "calib/mu_w": 0.8258490566037736, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26407114624505934, "calib/std_conf": 0.18891332361882235, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 1537.0, "completions/max_terminated_length": 1537.0, "completions/mean_length": 579.78515625, "completions/mean_terminated_length": 582.058837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.2016, "grad_norm": 0.08419397473335266, "learning_rate": 3.055555555555556e-07, "loss": 0.0249, "num_tokens": 44202718.0, "reward": 1.2317867279052734, "reward_std": 0.2693943381309509, "rewards/accuracy_reward_step": 0.57421875, "rewards/final_brier_reward_step": 0.653709352016449, "rewards/format_reward_step": 0.98828125, "step": 189 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.261895272580693e-07, "aux_brier/mean_group_std": 0.1180641851584547, "aux_brier/mean_r": 0.6871991138607908, "aux_brier/n_active_tok": 318.125, "aux_brier/n_groups": 15.40625, "aux_brier/n_step_records": 79.53125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.6120611770779556, "calib/avg_num_step_conf": 10.78515625, "calib/ece": 0.2400790513833994, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.42292490118577075, "calib/gap": 0.08553497676819855, "calib/mean_conf": 0.7960474308300395, "calib/mu_c": 0.8312080536912753, "calib/mu_w": 0.7456730769230767, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.22359683794466423, "calib/std_conf": 0.22699138454885742, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2934.0, "completions/max_terminated_length": 2934.0, "completions/mean_length": 634.890625, "completions/mean_terminated_length": 634.890625, "completions/min_length": 216.0, "completions/min_terminated_length": 216.0, "epoch": 0.20266666666666666, "grad_norm": 0.03616810962557793, "learning_rate": 2.7777777777777776e-07, "loss": 0.0193, "num_tokens": 44470858.0, "reward": 1.2515528202056885, "reward_std": 0.2907295227050781, "rewards/accuracy_reward_step": 0.5859375, "rewards/final_brier_reward_step": 0.6937117576599121, "rewards/format_reward_step": 0.984375, "step": 190 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 3.8819080244723025e-07, "aux_brier/mean_group_std": 0.12084470755713787, "aux_brier/mean_r": 0.6825326358416054, "aux_brier/n_active_tok": 334.875, "aux_brier/n_groups": 16.09375, "aux_brier/n_step_records": 83.71875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.601985733564681, "calib/avg_num_step_conf": 11.265625, "calib/ece": 0.38772, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.46, "calib/gap": 0.0589582931688194, "calib/mean_conf": 0.8105999999999999, "calib/mu_c": 0.841965811965812, "calib/mu_w": 0.7830075187969926, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.36516000000000004, "calib/std_conf": 0.22669636080007988, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125, "completions/max_length": 2798.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 570.5703125, "completions/mean_terminated_length": 581.936279296875, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.20373333333333332, "grad_norm": 0.05756014212965965, "learning_rate": 2.5000000000000004e-07, "loss": -0.0469, "num_tokens": 44721092.0, "reward": 1.094632863998413, "reward_std": 0.2197417914867401, "rewards/accuracy_reward_step": 0.45703125, "rewards/final_brier_reward_step": 0.5972816348075867, "rewards/format_reward_step": 0.9765625, "step": 191 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -4.017105327686066e-07, "aux_brier/mean_group_std": 0.13229735907475035, "aux_brier/mean_r": 0.7189988864627267, "aux_brier/n_active_tok": 315.125, "aux_brier/n_groups": 17.78125, "aux_brier/n_step_records": 78.78125, "calib/answer_extract_rate": 0.96875, "calib/auroc": 0.5310182733050848, "calib/avg_num_step_conf": 10.8203125, "calib/ece": 0.3099934959349594, "calib/final_conf_rate": 0.9609375, "calib/format_rate": 0.9609375, "calib/frac_conf_gt_0.9": 0.37398373983739835, "calib/gap": 0.033709533898305, "calib/mean_conf": 0.7682178861788618, "calib/mu_c": 0.7843874999999999, "calib/mu_w": 0.7506779661016949, "calib/nonempty_final_conf_rate": 0.9609375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.27894308943089435, "calib/std_conf": 0.23591018410538667, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 2129.0, "completions/max_terminated_length": 2129.0, "completions/mean_length": 620.95703125, "completions/mean_terminated_length": 630.8135375976562, "completions/min_length": 0.0, "completions/min_terminated_length": 180.0, "epoch": 0.2048, "grad_norm": 0.16255465149879456, "learning_rate": 2.2222222222222224e-07, "loss": -0.0091, "num_tokens": 44985033.0, "reward": 1.1405599117279053, "reward_std": 0.36320745944976807, "rewards/accuracy_reward_step": 0.50390625, "rewards/final_brier_reward_step": 0.6247397065162659, "rewards/format_reward_step": 0.9609375, "step": 192 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.73468718795467e-08, "aux_brier/mean_group_std": 0.12834107534476466, "aux_brier/mean_r": 0.6905276728515516, "aux_brier/n_active_tok": 341.875, "aux_brier/n_groups": 20.4375, "aux_brier/n_step_records": 85.46875, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.47207613323315806, "calib/avg_num_step_conf": 11.35546875, "calib/ece": 0.30897233201581037, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.41106719367588934, "calib/gap": 0.010392561983471271, "calib/mean_conf": 0.7943478260869566, "calib/mu_c": 0.7993181818181819, "calib/mu_w": 0.7889256198347107, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2907905138339922, "calib/std_conf": 0.21251196098682956, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 2298.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 603.78515625, "completions/mean_terminated_length": 608.5393676757812, "completions/min_length": 0.0, "completions/min_terminated_length": 232.0, "epoch": 0.20586666666666667, "grad_norm": 0.061190713196992874, "learning_rate": 1.9444444444444447e-07, "loss": -0.0169, "num_tokens": 45245314.0, "reward": 1.166947364807129, "reward_std": 0.4310789704322815, "rewards/accuracy_reward_step": 0.515625, "rewards/final_brier_reward_step": 0.6287269592285156, "rewards/format_reward_step": 0.98828125, "step": 193 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.467537613653036e-08, "aux_brier/mean_group_std": 0.10921226174267654, "aux_brier/mean_r": 0.7200921041378893, "aux_brier/n_active_tok": 300.0, "aux_brier/n_groups": 14.84375, "aux_brier/n_step_records": 75.0, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5728968405024742, "calib/avg_num_step_conf": 9.484375, "calib/ece": 0.2767193675889329, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.45849802371541504, "calib/gap": 0.061031594975256676, "calib/mean_conf": 0.8053359683794467, "calib/mu_c": 0.8321126760563381, "calib/mu_w": 0.7710810810810814, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.26039525691699617, "calib/std_conf": 0.2147219586728735, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 1979.0, "completions/max_terminated_length": 1979.0, "completions/mean_length": 543.35546875, "completions/mean_terminated_length": 543.35546875, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 0.20693333333333333, "grad_norm": 0.032329004257917404, "learning_rate": 1.6666666666666668e-07, "loss": 0.022, "num_tokens": 45490357.0, "reward": 1.2202813625335693, "reward_std": 0.3027583658695221, "rewards/accuracy_reward_step": 0.55859375, "rewards/final_brier_reward_step": 0.6701878905296326, "rewards/format_reward_step": 0.98828125, "step": 194 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.4904705261664652e-07, "aux_brier/mean_group_std": 0.1341669397120112, "aux_brier/mean_r": 0.7263006015498974, "aux_brier/n_active_tok": 307.0, "aux_brier/n_groups": 15.25, "aux_brier/n_step_records": 76.75, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.539510151466323, "calib/avg_num_step_conf": 10.0390625, "calib/ece": 0.2530158730158731, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.984375, "calib/frac_conf_gt_0.9": 0.38095238095238093, "calib/gap": 0.043546245568804265, "calib/mean_conf": 0.7946825396825397, "calib/mu_c": 0.8131724137931035, "calib/mu_w": 0.7696261682242992, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.23615079365079372, "calib/std_conf": 0.20581886977499117, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2747.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 564.10546875, "completions/mean_terminated_length": 570.7944946289062, "completions/min_length": 0.0, "completions/min_terminated_length": 188.0, "epoch": 0.208, "grad_norm": 0.07772547006607056, "learning_rate": 1.3888888888888888e-07, "loss": -0.0248, "num_tokens": 45740752.0, "reward": 1.2275408506393433, "reward_std": 0.29382142424583435, "rewards/accuracy_reward_step": 0.56640625, "rewards/final_brier_reward_step": 0.6757882833480835, "rewards/format_reward_step": 0.984375, "step": 195 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 7.826108038705426e-08, "aux_brier/mean_group_std": 0.10924094080593996, "aux_brier/mean_r": 0.6989476227265496, "aux_brier/n_active_tok": 300.125, "aux_brier/n_groups": 12.78125, "aux_brier/n_step_records": 75.03125, "calib/answer_extract_rate": 0.9921875, "calib/auroc": 0.5354515474378487, "calib/avg_num_step_conf": 9.59375, "calib/ece": 0.2893307086614173, "calib/final_conf_rate": 0.9921875, "calib/format_rate": 0.9921875, "calib/frac_conf_gt_0.9": 0.4921259842519685, "calib/gap": 0.026340690005073664, "calib/mean_conf": 0.8389370078740156, "calib/mu_c": 0.8501369863013699, "calib/mu_w": 0.8237962962962962, "calib/nonempty_final_conf_rate": 0.9921875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.2767322834645669, "calib/std_conf": 0.18039123047418926, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 1552.0, "completions/max_terminated_length": 1552.0, "completions/mean_length": 518.61328125, "completions/mean_terminated_length": 522.6968383789062, "completions/min_length": 0.0, "completions/min_terminated_length": 220.0, "epoch": 0.20906666666666668, "grad_norm": 0.09711069613695145, "learning_rate": 1.1111111111111112e-07, "loss": -0.0158, "num_tokens": 45976061.0, "reward": 1.231645941734314, "reward_std": 0.18754267692565918, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.660959005355835, "rewards/format_reward_step": 0.9921875, "step": 196 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -7.674647184152672e-07, "aux_brier/mean_group_std": 0.13293358674493327, "aux_brier/mean_r": 0.7153765468896983, "aux_brier/n_active_tok": 319.25, "aux_brier/n_groups": 14.71875, "aux_brier/n_step_records": 79.8125, "calib/answer_extract_rate": 0.98828125, "calib/auroc": 0.4699100112485939, "calib/avg_num_step_conf": 10.6875, "calib/ece": 0.36051185770750993, "calib/final_conf_rate": 0.98828125, "calib/format_rate": 0.98828125, "calib/frac_conf_gt_0.9": 0.4782608695652174, "calib/gap": 0.0002670603674539418, "calib/mean_conf": 0.8032826086956522, "calib/mu_c": 0.8034166666666666, "calib/mu_w": 0.8031496062992126, "calib/nonempty_final_conf_rate": 0.98828125, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.3328853754940712, "calib/std_conf": 0.22820994462988967, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 1756.0, "completions/max_terminated_length": 1756.0, "completions/mean_length": 560.8359375, "completions/mean_terminated_length": 567.4862060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 273.0, "epoch": 0.21013333333333334, "grad_norm": 0.05755879357457161, "learning_rate": 8.333333333333334e-08, "loss": -0.0315, "num_tokens": 46224691.0, "reward": 1.1357746124267578, "reward_std": 0.3277100920677185, "rewards/accuracy_reward_step": 0.4921875, "rewards/final_brier_reward_step": 0.5977863073348999, "rewards/format_reward_step": 0.98828125, "step": 197 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": -3.4224928335824245e-08, "aux_brier/mean_group_std": 0.1235597167482547, "aux_brier/mean_r": 0.7128413021535491, "aux_brier/n_active_tok": 307.25, "aux_brier/n_groups": 15.3125, "aux_brier/n_step_records": 76.8125, "calib/answer_extract_rate": 0.984375, "calib/auroc": 0.5242310674592918, "calib/avg_num_step_conf": 9.8671875, "calib/ece": 0.3136111111111111, "calib/final_conf_rate": 0.984375, "calib/format_rate": 0.98046875, "calib/frac_conf_gt_0.9": 0.36904761904761907, "calib/gap": -0.0028547428276040065, "calib/mean_conf": 0.7955158730158729, "calib/mu_c": 0.7943150684931507, "calib/mu_w": 0.7971698113207547, "calib/nonempty_final_conf_rate": 0.984375, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 0.99609375, "calib/pce": 0.2648809523809524, "calib/std_conf": 0.20008165928682217, "calib/step_conf_rate": 0.99609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 2961.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 545.32421875, "completions/mean_terminated_length": 547.4627685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 189.0, "epoch": 0.2112, "grad_norm": 0.09032348543405533, "learning_rate": 5.555555555555556e-08, "loss": 0.0202, "num_tokens": 46469678.0, "reward": 1.2239996194839478, "reward_std": 0.3071012496948242, "rewards/accuracy_reward_step": 0.5703125, "rewards/final_brier_reward_step": 0.653810977935791, "rewards/format_reward_step": 0.98046875, "step": 198 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 9.611757822991507e-08, "aux_brier/mean_group_std": 0.13088208167619045, "aux_brier/mean_r": 0.6919954997381719, "aux_brier/n_active_tok": 319.75, "aux_brier/n_groups": 15.03125, "aux_brier/n_step_records": 79.9375, "calib/answer_extract_rate": 0.97265625, "calib/auroc": 0.42309462747418947, "calib/avg_num_step_conf": 10.859375, "calib/ece": 0.3160887096774196, "calib/final_conf_rate": 0.96875, "calib/format_rate": 0.96875, "calib/frac_conf_gt_0.9": 0.4717741935483871, "calib/gap": -0.022996646281317812, "calib/mean_conf": 0.8327016129032258, "calib/mu_c": 0.8224087591240876, "calib/mu_w": 0.8454054054054054, "calib/nonempty_final_conf_rate": 0.96875, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.29818548387096794, "calib/std_conf": 0.18949944590261614, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2792.0, "completions/max_terminated_length": 2792.0, "completions/mean_length": 635.921875, "completions/mean_terminated_length": 643.4624633789062, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.21226666666666666, "grad_norm": 0.02755553461611271, "learning_rate": 2.777777777777778e-08, "loss": 0.0438, "num_tokens": 46736674.0, "reward": 1.1713604927062988, "reward_std": 0.42046260833740234, "rewards/accuracy_reward_step": 0.53515625, "rewards/final_brier_reward_step": 0.607316792011261, "rewards/format_reward_step": 0.96875, "step": 199 }, { "aux_brier/lambda": 0.050000000000000024, "aux_brier/loss": 1.1839494104548542e-07, "aux_brier/mean_group_std": 0.13298443257382436, "aux_brier/mean_r": 0.6973319277544408, "aux_brier/n_active_tok": 330.75, "aux_brier/n_groups": 17.6875, "aux_brier/n_step_records": 82.6875, "calib/answer_extract_rate": 0.9765625, "calib/auroc": 0.5944006468566808, "calib/avg_num_step_conf": 10.98046875, "calib/ece": 0.2155200000000001, "calib/final_conf_rate": 0.9765625, "calib/format_rate": 0.9765625, "calib/frac_conf_gt_0.9": 0.412, "calib/gap": 0.08189475102755861, "calib/mean_conf": 0.7828, "calib/mu_c": 0.8145751633986927, "calib/mu_w": 0.7326804123711341, "calib/nonempty_final_conf_rate": 0.9765625, "calib/nonempty_reasoning_rate": 1.0, "calib/nonempty_step_conf_rate": 1.0, "calib/pce": 0.1931600000000001, "calib/std_conf": 0.23010467183436323, "calib/step_conf_rate": 1.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 2552.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 612.23046875, "completions/mean_terminated_length": 619.4901733398438, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 0.21333333333333335, "grad_norm": 0.0391717366874218, "learning_rate": 0.0, "loss": -0.0137, "num_tokens": 47001453.0, "reward": 1.2615517377853394, "reward_std": 0.26289767026901245, "rewards/accuracy_reward_step": 0.59765625, "rewards/final_brier_reward_step": 0.702457070350647, "rewards/format_reward_step": 0.9765625, "step": 200 }, { "epoch": 0.21333333333333335, "step": 200, "total_flos": 0.0, "train_loss": 0.02537305059027858, "train_runtime": 17053.7031, "train_samples_per_second": 3.002, "train_steps_per_second": 0.012 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 47001453, "num_train_epochs": 1, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }