Files
PureRL-1.5B-v12B-lam005/trainer_state.json
ModelHub XC af682a620f 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v12B-lam005
Source: Original Platform
2026-06-04 17:08:31 +08:00

9839 lines
388 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"aux_brier/lambda": 0.05,
"aux_brier/loss": 5.791089203391117e-07,
"aux_brier/mean_group_std": 0.06289231620091193,
"aux_brier/mean_r": 0.4665906001184907,
"aux_brier/n_active_tok": 24.615384615384617,
"aux_brier/n_groups": 5.3076923076923075,
"aux_brier/n_step_records": 6.153846153846154,
"calib/answer_extract_rate": 0.08203125,
"calib/auroc": 0.6944444444444445,
"calib/avg_num_step_conf": 0.3359375,
"calib/ece": 0.6230769230769231,
"calib/final_conf_rate": 0.05078125,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.03861111111111115,
"calib/mean_conf": 0.9307692307692309,
"calib/mu_c": 0.9575,
"calib/mu_w": 0.9188888888888889,
"calib/nonempty_final_conf_rate": 0.05078125,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.6230769230769231,
"calib/std_conf": 0.07965903671384378,
"calib/step_conf_rate": 0.0703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 613.67578125,
"completions/mean_terminated_length": 674.2532348632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.16193392872810364,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0318,
"num_tokens": 264685.0,
"reward": 0.04124843701720238,
"reward_std": 0.0838509351015091,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.01655624993145466,
"rewards/format_reward_step": 0.04296875,
"step": 1
},
{
"aux_brier/lambda": 0.05000000000000002,
"aux_brier/loss": 2.461345396504181e-08,
"aux_brier/mean_group_std": 0.046398653263787254,
"aux_brier/mean_r": 0.430243897442093,
"aux_brier/n_active_tok": 28.42105263157895,
"aux_brier/n_groups": 5.894736842105263,
"aux_brier/n_step_records": 7.105263157894737,
"calib/answer_extract_rate": 0.13671875,
"calib/auroc": 0.5338345864661654,
"calib/avg_num_step_conf": 0.55078125,
"calib/ece": 0.6261538461538463,
"calib/final_conf_rate": 0.1015625,
"calib/format_rate": 0.08984375,
"calib/frac_conf_gt_0.9": 0.7692307692307693,
"calib/gap": 0.002406015037593856,
"calib/mean_conf": 0.8953846153846153,
"calib/mu_c": 0.897142857142857,
"calib/mu_w": 0.8947368421052632,
"calib/nonempty_final_conf_rate": 0.1015625,
"calib/nonempty_reasoning_rate": 0.14453125,
"calib/nonempty_step_conf_rate": 0.109375,
"calib/pce": 0.6261538461538463,
"calib/std_conf": 0.18653172073466937,
"calib/step_conf_rate": 0.109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0546875,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 646.4609375,
"completions/mean_terminated_length": 683.8594970703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.006251324899494648,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0643,
"num_tokens": 533467.0,
"reward": 0.08358447253704071,
"reward_std": 0.15892045199871063,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.02965039201080799,
"rewards/format_reward_step": 0.08984375,
"step": 2
},
{
"aux_brier/lambda": 0.05,
"aux_brier/loss": 3.2406800544647144e-08,
"aux_brier/mean_group_std": 0.0121952197767889,
"aux_brier/mean_r": 0.4305657651894744,
"aux_brier/n_active_tok": 16.571428571428573,
"aux_brier/n_groups": 3.5,
"aux_brier/n_step_records": 4.142857142857143,
"calib/answer_extract_rate": 0.046875,
"calib/avg_num_step_conf": 0.234375,
"calib/ece": 0.9137500000000001,
"calib/final_conf_rate": 0.03125,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.875,
"calib/mean_conf": 0.9137500000000001,
"calib/mu_c": NaN,
"calib/mu_w": 0.9137500000000001,
"calib/nonempty_final_conf_rate": 0.03125,
"calib/nonempty_reasoning_rate": 0.08203125,
"calib/nonempty_step_conf_rate": 0.06640625,
"calib/pce": 0.9137500000000001,
"calib/std_conf": 0.12267207302397722,
"calib/step_conf_rate": 0.06640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 2964.0,
"completions/max_terminated_length": 2964.0,
"completions/mean_length": 680.3984375,
"completions/mean_terminated_length": 767.321533203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.37062835693359375,
"learning_rate": 7.5e-07,
"loss": 0.0115,
"num_tokens": 812905.0,
"reward": 0.01679697260260582,
"reward_std": 0.039274316281080246,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.004687890410423279,
"rewards/format_reward_step": 0.03125,
"step": 3
},
{
"aux_brier/lambda": 0.049999999999999996,
"aux_brier/loss": -3.602478569421562e-09,
"aux_brier/mean_group_std": 0.009702237574707062,
"aux_brier/mean_r": 0.3583118615472482,
"aux_brier/n_active_tok": 21.714285714285715,
"aux_brier/n_groups": 4.714285714285714,
"aux_brier/n_step_records": 5.428571428571429,
"calib/answer_extract_rate": 0.05859375,
"calib/auroc": 0.4666666666666667,
"calib/avg_num_step_conf": 0.16015625,
"calib/ece": 0.6024999999999999,
"calib/final_conf_rate": 0.03125,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0013333333333334085,
"calib/mean_conf": 0.9774999999999999,
"calib/mu_c": 0.9766666666666666,
"calib/mu_w": 0.978,
"calib/nonempty_final_conf_rate": 0.03125,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.03125,
"calib/pce": 0.6024999999999999,
"calib/std_conf": 0.017139136501002624,
"calib/step_conf_rate": 0.03125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2833.0,
"completions/max_terminated_length": 2833.0,
"completions/mean_length": 699.56640625,
"completions/mean_terminated_length": 768.622314453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.0027342927642166615,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0228,
"num_tokens": 1098162.0,
"reward": 0.02265048772096634,
"reward_std": 0.05369918793439865,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.004664453212171793,
"rewards/format_reward_step": 0.01953125,
"step": 4
},
{
"aux_brier/lambda": 0.049999999999999996,
"aux_brier/loss": 0.0,
"aux_brier/mean_group_std": 0.0,
"aux_brier/mean_r": 0.6531497392606985,
"aux_brier/n_active_tok": 13.333333333333334,
"aux_brier/n_groups": 3.3333333333333335,
"aux_brier/n_step_records": 3.3333333333333335,
"calib/answer_extract_rate": 0.046875,
"calib/avg_num_step_conf": 0.078125,
"calib/ece": 0.9357142857142857,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 0.7142857142857143,
"calib/mean_conf": 0.9357142857142857,
"calib/mu_c": NaN,
"calib/mu_w": 0.9357142857142857,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.046875,
"calib/nonempty_step_conf_rate": 0.0234375,
"calib/pce": 0.9357142857142857,
"calib/std_conf": 0.046246897303538974,
"calib/step_conf_rate": 0.0234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 2945.0,
"completions/max_terminated_length": 2945.0,
"completions/mean_length": 652.7890625,
"completions/mean_terminated_length": 723.437255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.0021710810251533985,
"learning_rate": 1.25e-06,
"loss": 0.0131,
"num_tokens": 1371964.0,
"reward": 0.01028769463300705,
"reward_std": 0.02496844157576561,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.002088281325995922,
"rewards/format_reward_step": 0.01953125,
"step": 5
},
{
"aux_brier/lambda": 0.05,
"aux_brier/loss": -1.7029898761019728e-09,
"aux_brier/mean_group_std": 0.006566845763957197,
"aux_brier/mean_r": 0.48239234059468156,
"aux_brier/n_active_tok": 25.714285714285715,
"aux_brier/n_groups": 6.071428571428571,
"aux_brier/n_step_records": 6.428571428571429,
"calib/answer_extract_rate": 0.109375,
"calib/auroc": 0.8717948717948718,
"calib/avg_num_step_conf": 0.3828125,
"calib/ece": 0.706425,
"calib/final_conf_rate": 0.0625,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.875,
"calib/gap": 0.10593846153846154,
"calib/mean_conf": 0.893925,
"calib/mu_c": 0.98,
"calib/mu_w": 0.8740615384615384,
"calib/nonempty_final_conf_rate": 0.0625,
"calib/nonempty_reasoning_rate": 0.1171875,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.706425,
"calib/std_conf": 0.2299773562223029,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3018.0,
"completions/max_terminated_length": 3018.0,
"completions/mean_length": 601.28515625,
"completions/mean_terminated_length": 652.2415161132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.0033739216160029173,
"learning_rate": 1.5e-06,
"loss": -0.0025,
"num_tokens": 1631845.0,
"reward": 0.03785628452897072,
"reward_std": 0.05977939814329147,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.018612641841173172,
"rewards/format_reward_step": 0.04296875,
"step": 6
},
{
"aux_brier/lambda": 0.05000000000000001,
"aux_brier/loss": -2.053478910722463e-09,
"aux_brier/mean_group_std": 0.019962047751666122,
"aux_brier/mean_r": 0.48358245428526425,
"aux_brier/n_active_tok": 19.80952380952381,
"aux_brier/n_groups": 4.523809523809524,
"aux_brier/n_step_records": 4.9523809523809526,
"calib/answer_extract_rate": 0.09765625,
"calib/auroc": 0.45999999999999996,
"calib/avg_num_step_conf": 0.4140625,
"calib/ece": 0.7154999999999998,
"calib/final_conf_rate": 0.078125,
"calib/format_rate": 0.0625,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": -0.0019999999999997797,
"calib/mean_conf": 0.9654999999999999,
"calib/mu_c": 0.9640000000000001,
"calib/mu_w": 0.9659999999999999,
"calib/nonempty_final_conf_rate": 0.078125,
"calib/nonempty_reasoning_rate": 0.12890625,
"calib/nonempty_step_conf_rate": 0.09375,
"calib/pce": 0.7154999999999998,
"calib/std_conf": 0.03943031828428475,
"calib/step_conf_rate": 0.09375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2940.0,
"completions/max_terminated_length": 2940.0,
"completions/mean_length": 680.328125,
"completions/mean_terminated_length": 741.1233520507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.09639103710651398,
"learning_rate": 1.75e-06,
"loss": 0.0245,
"num_tokens": 1913433.0,
"reward": 0.055225878953933716,
"reward_std": 0.10832781344652176,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.017778515815734863,
"rewards/format_reward_step": 0.0625,
"step": 7
},
{
"aux_brier/lambda": 0.05000000000000001,
"aux_brier/loss": -1.7644980783160236e-06,
"aux_brier/mean_group_std": 0.010382280017897379,
"aux_brier/mean_r": 0.5579407203866258,
"aux_brier/n_active_tok": 24.75,
"aux_brier/n_groups": 5.125,
"aux_brier/n_step_records": 6.1875,
"calib/answer_extract_rate": 0.1015625,
"calib/auroc": 0.5654761904761905,
"calib/avg_num_step_conf": 0.38671875,
"calib/ece": 0.6050000000000001,
"calib/final_conf_rate": 0.078125,
"calib/format_rate": 0.05859375,
"calib/frac_conf_gt_0.9": 0.65,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.8099999999999998,
"calib/mu_c": 0.81,
"calib/mu_w": 0.8100000000000002,
"calib/nonempty_final_conf_rate": 0.078125,
"calib/nonempty_reasoning_rate": 0.1171875,
"calib/nonempty_step_conf_rate": 0.08203125,
"calib/pce": 0.5575000000000001,
"calib/std_conf": 0.27910571473905726,
"calib/step_conf_rate": 0.08203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3065.0,
"completions/max_terminated_length": 3065.0,
"completions/mean_length": 652.69921875,
"completions/mean_terminated_length": 711.0255126953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.005197752732783556,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0113,
"num_tokens": 2187036.0,
"reward": 0.059211522340774536,
"reward_std": 0.130070760846138,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.025908593088388443,
"rewards/format_reward_step": 0.05859375,
"step": 8
},
{
"aux_brier/lambda": 0.05,
"aux_brier/loss": 5.528596944351158e-08,
"aux_brier/mean_group_std": 0.03271103883501513,
"aux_brier/mean_r": 0.49176046228937714,
"aux_brier/n_active_tok": 22.46153846153846,
"aux_brier/n_groups": 4.923076923076923,
"aux_brier/n_step_records": 5.615384615384615,
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.9090909090909091,
"calib/avg_num_step_conf": 0.28515625,
"calib/ece": 0.7175,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.5833333333333334,
"calib/gap": 0.2063636363636363,
"calib/mean_conf": 0.8008333333333333,
"calib/mu_c": 0.99,
"calib/mu_w": 0.7836363636363637,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.0859375,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.7175,
"calib/std_conf": 0.2886017536714256,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 2927.0,
"completions/max_terminated_length": 2927.0,
"completions/mean_length": 663.49609375,
"completions/mean_terminated_length": 735.3030395507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0096,
"grad_norm": 0.1932460218667984,
"learning_rate": 2.25e-06,
"loss": 0.0067,
"num_tokens": 2464427.0,
"reward": 0.0225673820823431,
"reward_std": 0.06383019685745239,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.01214453112334013,
"rewards/format_reward_step": 0.03125,
"step": 9
},
{
"aux_brier/lambda": 0.05000000000000001,
"aux_brier/loss": -6.086005588379351e-09,
"aux_brier/mean_group_std": 0.04457763181302451,
"aux_brier/mean_r": 0.4550933216206302,
"aux_brier/n_active_tok": 24.75,
"aux_brier/n_groups": 5.0,
"aux_brier/n_step_records": 6.1875,
"calib/answer_extract_rate": 0.08984375,
"calib/auroc": 0.8529411764705882,
"calib/avg_num_step_conf": 0.40234375,
"calib/ece": 0.8458333333333334,
"calib/final_conf_rate": 0.0703125,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": 0.09382352941176475,
"calib/mean_conf": 0.901388888888889,
"calib/mu_c": 0.99,
"calib/mu_w": 0.8961764705882352,
"calib/nonempty_final_conf_rate": 0.0703125,
"calib/nonempty_reasoning_rate": 0.11328125,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.8458333333333334,
"calib/std_conf": 0.2220473270095787,
"calib/step_conf_rate": 0.08984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2955.0,
"completions/max_terminated_length": 2955.0,
"completions/mean_length": 673.3046875,
"completions/mean_terminated_length": 727.2826538085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.07814536243677139,
"learning_rate": 2.5e-06,
"loss": 0.0165,
"num_tokens": 2743593.0,
"reward": 0.039519019424915314,
"reward_std": 0.08574040234088898,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.009638573974370956,
"rewards/format_reward_step": 0.06640625,
"step": 10
},
{
"aux_brier/lambda": 0.05000000000000001,
"aux_brier/loss": 2.290517282721978e-08,
"aux_brier/mean_group_std": 0.04705990808180376,
"aux_brier/mean_r": 0.43014223031838883,
"aux_brier/n_active_tok": 27.238095238095237,
"aux_brier/n_groups": 5.619047619047619,
"aux_brier/n_step_records": 6.809523809523809,
"calib/answer_extract_rate": 0.1171875,
"calib/auroc": 0.5625,
"calib/avg_num_step_conf": 0.60546875,
"calib/ece": 0.6345000000000001,
"calib/final_conf_rate": 0.0859375,
"calib/format_rate": 0.06640625,
"calib/frac_conf_gt_0.9": 0.8181818181818182,
"calib/gap": 0.07004166666666656,
"calib/mean_conf": 0.9072272727272727,
"calib/mu_c": 0.9581666666666666,
"calib/mu_w": 0.888125,
"calib/nonempty_final_conf_rate": 0.0859375,
"calib/nonempty_reasoning_rate": 0.15234375,
"calib/nonempty_step_conf_rate": 0.12109375,
"calib/pce": 0.6345000000000001,
"calib/std_conf": 0.20186087283936693,
"calib/step_conf_rate": 0.12109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2868.0,
"completions/max_terminated_length": 2868.0,
"completions/mean_length": 722.78515625,
"completions/mean_terminated_length": 787.3744506835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.027079828083515167,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0399,
"num_tokens": 3033106.0,
"reward": 0.06367333978414536,
"reward_std": 0.16068580746650696,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.028130855411291122,
"rewards/format_reward_step": 0.06640625,
"step": 11
},
{
"aux_brier/lambda": 0.05000000000000002,
"aux_brier/loss": 3.784316033872415e-08,
"aux_brier/mean_group_std": 0.050264136058727066,
"aux_brier/mean_r": 0.51797735672114,
"aux_brier/n_active_tok": 26.72,
"aux_brier/n_groups": 5.0,
"aux_brier/n_step_records": 6.68,
"calib/answer_extract_rate": 0.16015625,
"calib/auroc": 0.5133333333333333,
"calib/avg_num_step_conf": 0.66015625,
"calib/ece": 0.6474705882352942,
"calib/final_conf_rate": 0.1328125,
"calib/format_rate": 0.10546875,
"calib/frac_conf_gt_0.9": 0.7352941176470589,
"calib/gap": 0.0350666666666668,
"calib/mean_conf": 0.8868823529411765,
"calib/mu_c": 0.9126666666666667,
"calib/mu_w": 0.8775999999999999,
"calib/nonempty_final_conf_rate": 0.1328125,
"calib/nonempty_reasoning_rate": 0.1875,
"calib/nonempty_step_conf_rate": 0.14453125,
"calib/pce": 0.6348235294117648,
"calib/std_conf": 0.175223011634398,
"calib/step_conf_rate": 0.14453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05078125,
"completions/max_length": 3045.0,
"completions/max_terminated_length": 3045.0,
"completions/mean_length": 598.9453125,
"completions/mean_terminated_length": 630.9876098632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0128,
"grad_norm": 0.0846952348947525,
"learning_rate": 3e-06,
"loss": 0.0453,
"num_tokens": 3290612.0,
"reward": 0.09865903109312057,
"reward_std": 0.1780010312795639,
"rewards/accuracy_reward_step": 0.03515625,
"rewards/final_brier_reward_step": 0.043073609471321106,
"rewards/format_reward_step": 0.10546875,
"step": 12
},
{
"aux_brier/lambda": 0.05000000000000002,
"aux_brier/loss": 1.8118686498007533e-07,
"aux_brier/mean_group_std": 0.035596244655370236,
"aux_brier/mean_r": 0.43525204805498086,
"aux_brier/n_active_tok": 26.434782608695652,
"aux_brier/n_groups": 5.130434782608695,
"aux_brier/n_step_records": 6.608695652173913,
"calib/answer_extract_rate": 0.15625,
"calib/auroc": 0.40384615384615385,
"calib/avg_num_step_conf": 0.6015625,
"calib/ece": 0.7746896551724138,
"calib/final_conf_rate": 0.11328125,
"calib/format_rate": 0.09375,
"calib/frac_conf_gt_0.9": 0.6896551724137931,
"calib/gap": 0.020666666666666833,
"calib/mean_conf": 0.8781379310344828,
"calib/mu_c": 0.8966666666666668,
"calib/mu_w": 0.876,
"calib/nonempty_final_conf_rate": 0.11328125,
"calib/nonempty_reasoning_rate": 0.17578125,
"calib/nonempty_step_conf_rate": 0.125,
"calib/pce": 0.7746896551724138,
"calib/std_conf": 0.18809896957277586,
"calib/step_conf_rate": 0.125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 633.203125,
"completions/mean_terminated_length": 683.9661865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.008228050544857979,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0405,
"num_tokens": 3557304.0,
"reward": 0.07202839851379395,
"reward_std": 0.14977121353149414,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.022488608956336975,
"rewards/format_reward_step": 0.09375,
"step": 13
},
{
"aux_brier/lambda": 0.05000000000000002,
"aux_brier/loss": 6.9190393318145334e-09,
"aux_brier/mean_group_std": 0.07575889127849744,
"aux_brier/mean_r": 0.4560538181934339,
"aux_brier/n_active_tok": 31.652173913043477,
"aux_brier/n_groups": 5.391304347826087,
"aux_brier/n_step_records": 7.913043478260869,
"calib/answer_extract_rate": 0.1796875,
"calib/auroc": 0.46296296296296297,
"calib/avg_num_step_conf": 0.7265625,
"calib/ece": 0.5727500000000001,
"calib/final_conf_rate": 0.15625,
"calib/format_rate": 0.12890625,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.07968660968660957,
"calib/mean_conf": 0.8977499999999999,
"calib/mu_c": 0.9515384615384613,
"calib/mu_w": 0.8718518518518518,
"calib/nonempty_final_conf_rate": 0.15625,
"calib/nonempty_reasoning_rate": 0.203125,
"calib/nonempty_step_conf_rate": 0.1640625,
"calib/pce": 0.5727500000000001,
"calib/std_conf": 0.2133833112031023,
"calib/step_conf_rate": 0.1640625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2915.0,
"completions/max_terminated_length": 2915.0,
"completions/mean_length": 589.546875,
"completions/mean_terminated_length": 636.8101196289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.03263997659087181,
"learning_rate": 3.5e-06,
"loss": 0.0416,
"num_tokens": 3813628.0,
"reward": 0.12998661398887634,
"reward_std": 0.2102944701910019,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.05900898203253746,
"rewards/format_reward_step": 0.12890625,
"step": 14
},
{
"aux_brier/lambda": 0.05000000000000002,
"aux_brier/loss": 6.183022988048847e-08,
"aux_brier/mean_group_std": 0.07469891433623198,
"aux_brier/mean_r": 0.4410105300338308,
"aux_brier/n_active_tok": 30.333333333333332,
"aux_brier/n_groups": 5.166666666666667,
"aux_brier/n_step_records": 7.583333333333333,
"calib/answer_extract_rate": 0.22265625,
"calib/auroc": 0.3846153846153846,
"calib/avg_num_step_conf": 0.765625,
"calib/ece": 0.6639130434782607,
"calib/final_conf_rate": 0.1796875,
"calib/format_rate": 0.1328125,
"calib/frac_conf_gt_0.9": 0.8478260869565217,
"calib/gap": -0.012867132867132813,
"calib/mean_conf": 0.9400000000000001,
"calib/mu_c": 0.9307692307692308,
"calib/mu_w": 0.9436363636363636,
"calib/nonempty_final_conf_rate": 0.1796875,
"calib/nonempty_reasoning_rate": 0.2578125,
"calib/nonempty_step_conf_rate": 0.1796875,
"calib/pce": 0.6606521739130433,
"calib/std_conf": 0.07723481981143186,
"calib/step_conf_rate": 0.1796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3062.0,
"completions/max_terminated_length": 3062.0,
"completions/mean_length": 606.9140625,
"completions/mean_terminated_length": 652.815185546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.016,
"grad_norm": 0.08959263563156128,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0954,
"num_tokens": 4076878.0,
"reward": 0.12888270616531372,
"reward_std": 0.24933215975761414,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.04678086191415787,
"rewards/format_reward_step": 0.1328125,
"step": 15
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.0848133385239172e-07,
"aux_brier/mean_group_std": 0.09843642749678756,
"aux_brier/mean_r": 0.44798128896741424,
"aux_brier/n_active_tok": 47.714285714285715,
"aux_brier/n_groups": 8.0,
"aux_brier/n_step_records": 11.928571428571429,
"calib/answer_extract_rate": 0.2421875,
"calib/auroc": 0.5825358851674641,
"calib/avg_num_step_conf": 1.31640625,
"calib/ece": 0.6845170068027212,
"calib/final_conf_rate": 0.19140625,
"calib/format_rate": 0.171875,
"calib/frac_conf_gt_0.9": 0.8367346938775511,
"calib/gap": 0.06927113237639537,
"calib/mean_conf": 0.9090068027210885,
"calib/mu_c": 0.9627272727272725,
"calib/mu_w": 0.8934561403508772,
"calib/nonempty_final_conf_rate": 0.19140625,
"calib/nonempty_reasoning_rate": 0.26953125,
"calib/nonempty_step_conf_rate": 0.21484375,
"calib/pce": 0.6845170068027212,
"calib/std_conf": 0.1927121205450507,
"calib/step_conf_rate": 0.21484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.046875,
"completions/max_length": 2984.0,
"completions/max_terminated_length": 2984.0,
"completions/mean_length": 639.68359375,
"completions/mean_terminated_length": 671.1434326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.10603635013103485,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0904,
"num_tokens": 4349485.0,
"reward": 0.14869771897792816,
"reward_std": 0.2691415250301361,
"rewards/accuracy_reward_step": 0.046875,
"rewards/final_brier_reward_step": 0.06354087591171265,
"rewards/format_reward_step": 0.171875,
"step": 16
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.634651545818656e-08,
"aux_brier/mean_group_std": 0.16041422243976916,
"aux_brier/mean_r": 0.4858352168629629,
"aux_brier/n_active_tok": 59.06666666666667,
"aux_brier/n_groups": 7.033333333333333,
"aux_brier/n_step_records": 14.766666666666667,
"calib/answer_extract_rate": 0.3203125,
"calib/auroc": 0.5146750524109014,
"calib/avg_num_step_conf": 1.73828125,
"calib/ece": 0.6458450704225353,
"calib/final_conf_rate": 0.27734375,
"calib/format_rate": 0.23828125,
"calib/frac_conf_gt_0.9": 0.7183098591549296,
"calib/gap": 0.03657232704402513,
"calib/mean_conf": 0.8993661971830985,
"calib/mu_c": 0.9266666666666666,
"calib/mu_w": 0.8900943396226415,
"calib/nonempty_final_conf_rate": 0.27734375,
"calib/nonempty_reasoning_rate": 0.375,
"calib/nonempty_step_conf_rate": 0.3203125,
"calib/pce": 0.6458450704225353,
"calib/std_conf": 0.16196837105918782,
"calib/step_conf_rate": 0.3203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 2940.0,
"completions/max_terminated_length": 2940.0,
"completions/mean_length": 548.0078125,
"completions/mean_terminated_length": 589.4537963867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.16882355511188507,
"learning_rate": 4.25e-06,
"loss": 0.0821,
"num_tokens": 4593303.0,
"reward": 0.2112351357936859,
"reward_std": 0.29052725434303284,
"rewards/accuracy_reward_step": 0.0703125,
"rewards/final_brier_reward_step": 0.08712802827358246,
"rewards/format_reward_step": 0.23828125,
"step": 17
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.5866668310020165e-09,
"aux_brier/mean_group_std": 0.10337834969695207,
"aux_brier/mean_r": 0.4454792474401508,
"aux_brier/n_active_tok": 41.42857142857143,
"aux_brier/n_groups": 5.714285714285714,
"aux_brier/n_step_records": 10.357142857142858,
"calib/answer_extract_rate": 0.28515625,
"calib/auroc": 0.4962121212121212,
"calib/avg_num_step_conf": 1.1484375,
"calib/ece": 0.5890161290322582,
"calib/final_conf_rate": 0.2421875,
"calib/format_rate": 0.19140625,
"calib/frac_conf_gt_0.9": 0.7096774193548387,
"calib/gap": 0.05194191919191926,
"calib/mean_conf": 0.8686935483870968,
"calib/mu_c": 0.9055555555555556,
"calib/mu_w": 0.8536136363636363,
"calib/nonempty_final_conf_rate": 0.2421875,
"calib/nonempty_reasoning_rate": 0.33984375,
"calib/nonempty_step_conf_rate": 0.265625,
"calib/pce": 0.5836935483870969,
"calib/std_conf": 0.22807947035427445,
"calib/step_conf_rate": 0.265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2988.0,
"completions/max_terminated_length": 2988.0,
"completions/mean_length": 574.22265625,
"completions/mean_terminated_length": 609.9627075195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0192,
"grad_norm": 0.29395243525505066,
"learning_rate": 4.5e-06,
"loss": 0.0954,
"num_tokens": 4851024.0,
"reward": 0.1949663907289505,
"reward_std": 0.2701266407966614,
"rewards/accuracy_reward_step": 0.078125,
"rewards/final_brier_reward_step": 0.08455304056406021,
"rewards/format_reward_step": 0.19140625,
"step": 18
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.71914328237633e-09,
"aux_brier/mean_group_std": 0.18596475159403847,
"aux_brier/mean_r": 0.4801690742344864,
"aux_brier/n_active_tok": 89.0,
"aux_brier/n_groups": 8.46875,
"aux_brier/n_step_records": 22.25,
"calib/answer_extract_rate": 0.56640625,
"calib/auroc": 0.6119100294985251,
"calib/avg_num_step_conf": 2.87109375,
"calib/ece": 0.7139416058394159,
"calib/final_conf_rate": 0.53515625,
"calib/format_rate": 0.44921875,
"calib/frac_conf_gt_0.9": 0.7372262773722628,
"calib/gap": 0.07784660766961649,
"calib/mean_conf": 0.8891240875912408,
"calib/mu_c": 0.9533333333333333,
"calib/mu_w": 0.8754867256637168,
"calib/nonempty_final_conf_rate": 0.53515625,
"calib/nonempty_reasoning_rate": 0.66796875,
"calib/nonempty_step_conf_rate": 0.5703125,
"calib/pce": 0.7139416058394159,
"calib/std_conf": 0.1940820597482887,
"calib/step_conf_rate": 0.5703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2724.0,
"completions/max_terminated_length": 2724.0,
"completions/mean_length": 425.109375,
"completions/mean_terminated_length": 435.31201171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.08734527975320816,
"learning_rate": 4.75e-06,
"loss": 0.1709,
"num_tokens": 5064612.0,
"reward": 0.35721492767333984,
"reward_std": 0.41247040033340454,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.15542227029800415,
"rewards/format_reward_step": 0.44921875,
"step": 19
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.3638003304737865e-08,
"aux_brier/mean_group_std": 0.19777696493153557,
"aux_brier/mean_r": 0.51686083569449,
"aux_brier/n_active_tok": 100.125,
"aux_brier/n_groups": 8.21875,
"aux_brier/n_step_records": 25.03125,
"calib/answer_extract_rate": 0.67578125,
"calib/auroc": 0.5950396825396825,
"calib/avg_num_step_conf": 3.234375,
"calib/ece": 0.6353620481927711,
"calib/final_conf_rate": 0.6484375,
"calib/format_rate": 0.515625,
"calib/frac_conf_gt_0.9": 0.6807228915662651,
"calib/gap": 0.07038412698412688,
"calib/mean_conf": 0.8763259036144578,
"calib/mu_c": 0.92975,
"calib/mu_w": 0.8593658730158731,
"calib/nonempty_final_conf_rate": 0.6484375,
"calib/nonempty_reasoning_rate": 0.78515625,
"calib/nonempty_step_conf_rate": 0.66796875,
"calib/pce": 0.6353620481927711,
"calib/std_conf": 0.2056600185856877,
"calib/step_conf_rate": 0.66796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2478.0,
"completions/max_terminated_length": 2478.0,
"completions/mean_length": 361.2421875,
"completions/mean_terminated_length": 366.9762268066406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.18377214670181274,
"learning_rate": 5e-06,
"loss": 0.085,
"num_tokens": 5261962.0,
"reward": 0.46865132451057434,
"reward_std": 0.5203608274459839,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.21835541725158691,
"rewards/format_reward_step": 0.515625,
"step": 20
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.4970630329153023e-08,
"aux_brier/mean_group_std": 0.22389224604257676,
"aux_brier/mean_r": 0.5237990710747721,
"aux_brier/n_active_tok": 132.125,
"aux_brier/n_groups": 10.59375,
"aux_brier/n_step_records": 33.03125,
"calib/answer_extract_rate": 0.796875,
"calib/auroc": 0.5736507936507937,
"calib/avg_num_step_conf": 4.2265625,
"calib/ece": 0.6526098958333334,
"calib/final_conf_rate": 0.75,
"calib/format_rate": 0.6640625,
"calib/frac_conf_gt_0.9": 0.7395833333333334,
"calib/gap": 0.07685742857142852,
"calib/mean_conf": 0.8713598958333334,
"calib/mu_c": 0.9314047619047618,
"calib/mu_w": 0.8545473333333333,
"calib/nonempty_final_conf_rate": 0.75,
"calib/nonempty_reasoning_rate": 0.87109375,
"calib/nonempty_step_conf_rate": 0.7734375,
"calib/pce": 0.6526098958333334,
"calib/std_conf": 0.22478363139637003,
"calib/step_conf_rate": 0.7734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3045.0,
"completions/max_terminated_length": 3045.0,
"completions/mean_length": 367.05859375,
"completions/mean_terminated_length": 368.498046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 24.0,
"epoch": 0.0224,
"grad_norm": 0.19530241191387177,
"learning_rate": 4.9722222222222224e-06,
"loss": 0.0974,
"num_tokens": 5458889.0,
"reward": 0.563724935054779,
"reward_std": 0.5039910078048706,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.25489968061447144,
"rewards/format_reward_step": 0.6640625,
"step": 21
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.0222760971068112e-09,
"aux_brier/mean_group_std": 0.20883598233943224,
"aux_brier/mean_r": 0.498378047303362,
"aux_brier/n_active_tok": 126.875,
"aux_brier/n_groups": 8.25,
"aux_brier/n_step_records": 31.71875,
"calib/answer_extract_rate": 0.83984375,
"calib/auroc": 0.46816097809475293,
"calib/avg_num_step_conf": 4.12890625,
"calib/ece": 0.5995587962962964,
"calib/final_conf_rate": 0.84375,
"calib/format_rate": 0.74609375,
"calib/frac_conf_gt_0.9": 0.7083333333333334,
"calib/gap": 0.028774172185430325,
"calib/mean_conf": 0.9004847222222223,
"calib/mu_c": 0.9206,
"calib/mu_w": 0.8918258278145696,
"calib/nonempty_final_conf_rate": 0.84375,
"calib/nonempty_reasoning_rate": 0.91015625,
"calib/nonempty_step_conf_rate": 0.84765625,
"calib/pce": 0.5995587962962964,
"calib/std_conf": 0.16218625466350314,
"calib/step_conf_rate": 0.84765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1795.0,
"completions/max_terminated_length": 1795.0,
"completions/mean_length": 299.58984375,
"completions/mean_terminated_length": 303.1423034667969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.11125664412975311,
"learning_rate": 4.944444444444445e-06,
"loss": 0.067,
"num_tokens": 5637400.0,
"reward": 0.7065411806106567,
"reward_std": 0.551918625831604,
"rewards/accuracy_reward_step": 0.25390625,
"rewards/final_brier_reward_step": 0.3183521032333374,
"rewards/format_reward_step": 0.74609375,
"step": 22
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.4593973210463851e-08,
"aux_brier/mean_group_std": 0.21091686692543818,
"aux_brier/mean_r": 0.4781024369606529,
"aux_brier/n_active_tok": 128.25,
"aux_brier/n_groups": 7.5,
"aux_brier/n_step_records": 32.0625,
"calib/answer_extract_rate": 0.87890625,
"calib/auroc": 0.6023887300939159,
"calib/avg_num_step_conf": 4.0859375,
"calib/ece": 0.6349454545454545,
"calib/final_conf_rate": 0.859375,
"calib/format_rate": 0.8125,
"calib/frac_conf_gt_0.9": 0.6954545454545454,
"calib/gap": 0.014763576970192038,
"calib/mean_conf": 0.9009454545454545,
"calib/mu_c": 0.9115483870967743,
"calib/mu_w": 0.8967848101265823,
"calib/nonempty_final_conf_rate": 0.859375,
"calib/nonempty_reasoning_rate": 0.95703125,
"calib/nonempty_step_conf_rate": 0.9140625,
"calib/pce": 0.6270363636363635,
"calib/std_conf": 0.15172348271316333,
"calib/step_conf_rate": 0.9140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2580.0,
"completions/max_terminated_length": 2580.0,
"completions/mean_length": 292.29296875,
"completions/mean_terminated_length": 293.4392395019531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 29.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.2156190723180771,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.0804,
"num_tokens": 5816163.0,
"reward": 0.7280130386352539,
"reward_std": 0.4908643662929535,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/final_brier_reward_step": 0.31830233335494995,
"rewards/format_reward_step": 0.8125,
"step": 23
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.842990707746967e-09,
"aux_brier/mean_group_std": 0.21221712414175256,
"aux_brier/mean_r": 0.5030739964811978,
"aux_brier/n_active_tok": 164.625,
"aux_brier/n_groups": 11.1875,
"aux_brier/n_step_records": 41.15625,
"calib/answer_extract_rate": 0.90234375,
"calib/auroc": 0.5535012285012284,
"calib/avg_num_step_conf": 5.18359375,
"calib/ece": 0.7131834061135371,
"calib/final_conf_rate": 0.89453125,
"calib/format_rate": 0.84765625,
"calib/frac_conf_gt_0.9": 0.7248908296943232,
"calib/gap": 0.02142678132678133,
"calib/mean_conf": 0.8974628820960697,
"calib/mu_c": 0.9147727272727273,
"calib/mu_w": 0.893345945945946,
"calib/nonempty_final_conf_rate": 0.89453125,
"calib/nonempty_reasoning_rate": 0.96484375,
"calib/nonempty_step_conf_rate": 0.9375,
"calib/pce": 0.7092532751091704,
"calib/std_conf": 0.1666275680637964,
"calib/step_conf_rate": 0.9375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2036.0,
"completions/max_terminated_length": 2036.0,
"completions/mean_length": 305.6015625,
"completions/mean_terminated_length": 305.6015625,
"completions/min_length": 15.0,
"completions/min_terminated_length": 15.0,
"epoch": 0.0256,
"grad_norm": 1.4956940412521362,
"learning_rate": 4.888888888888889e-06,
"loss": 0.0533,
"num_tokens": 5998909.0,
"reward": 0.6711543798446655,
"reward_std": 0.42066627740859985,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.2705550789833069,
"rewards/format_reward_step": 0.84765625,
"step": 24
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.9371144266709948e-08,
"aux_brier/mean_group_std": 0.18362094956793468,
"aux_brier/mean_r": 0.46135988830777225,
"aux_brier/n_active_tok": 157.5,
"aux_brier/n_groups": 9.125,
"aux_brier/n_step_records": 39.375,
"calib/answer_extract_rate": 0.91796875,
"calib/auroc": 0.5193012116089039,
"calib/avg_num_step_conf": 5.0078125,
"calib/ece": 0.6337889367816092,
"calib/final_conf_rate": 0.90625,
"calib/format_rate": 0.8515625,
"calib/frac_conf_gt_0.9": 0.7543103448275862,
"calib/gap": 0.05237352305813836,
"calib/mean_conf": 0.9053406609195404,
"calib/mu_c": 0.9434920634920634,
"calib/mu_w": 0.891118540433925,
"calib/nonempty_final_conf_rate": 0.90625,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.94140625,
"calib/pce": 0.6337889367816092,
"calib/std_conf": 0.17423891821746895,
"calib/step_conf_rate": 0.94140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1640.0,
"completions/max_terminated_length": 1640.0,
"completions/mean_length": 296.6328125,
"completions/mean_terminated_length": 297.79608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.8203977942466736,
"learning_rate": 4.861111111111111e-06,
"loss": 0.061,
"num_tokens": 6178071.0,
"reward": 0.7586723566055298,
"reward_std": 0.47993963956832886,
"rewards/accuracy_reward_step": 0.25,
"rewards/final_brier_reward_step": 0.33156442642211914,
"rewards/format_reward_step": 0.8515625,
"step": 25
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.1095086800694265e-08,
"aux_brier/mean_group_std": 0.18773403585079074,
"aux_brier/mean_r": 0.48856257711885304,
"aux_brier/n_active_tok": 166.5,
"aux_brier/n_groups": 8.71875,
"aux_brier/n_step_records": 41.625,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.4795587280986372,
"calib/avg_num_step_conf": 5.3046875,
"calib/ece": 0.7157371120107964,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.92578125,
"calib/frac_conf_gt_0.9": 0.7408906882591093,
"calib/gap": 0.020350234335568484,
"calib/mean_conf": 0.9019179487179487,
"calib/mu_c": 0.9184782608695651,
"calib/mu_w": 0.8981280265339966,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.71571012145749,
"calib/std_conf": 0.17176024475891646,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2688.0,
"completions/max_terminated_length": 2688.0,
"completions/mean_length": 310.42578125,
"completions/mean_terminated_length": 310.42578125,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.03431373089551926,
"learning_rate": 4.833333333333333e-06,
"loss": 0.1124,
"num_tokens": 6362780.0,
"reward": 0.7162113189697266,
"reward_std": 0.38292473554611206,
"rewards/accuracy_reward_step": 0.1796875,
"rewards/final_brier_reward_step": 0.29453274607658386,
"rewards/format_reward_step": 0.92578125,
"step": 26
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.646272480478663e-08,
"aux_brier/mean_group_std": 0.18948043789191105,
"aux_brier/mean_r": 0.47236006589088697,
"aux_brier/n_active_tok": 163.875,
"aux_brier/n_groups": 9.09375,
"aux_brier/n_step_records": 40.96875,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.4680213903743315,
"calib/avg_num_step_conf": 5.17578125,
"calib/ece": 0.705890576652602,
"calib/final_conf_rate": 0.92578125,
"calib/format_rate": 0.88671875,
"calib/frac_conf_gt_0.9": 0.7552742616033755,
"calib/gap": -0.015956745098039127,
"calib/mean_conf": 0.8970630098452883,
"calib/mu_c": 0.8844726666666667,
"calib/mu_w": 0.9004294117647058,
"calib/nonempty_final_conf_rate": 0.92578125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.6959915611814347,
"calib/std_conf": 0.19202561087719486,
"calib/step_conf_rate": 0.96875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1533.0,
"completions/max_terminated_length": 1533.0,
"completions/mean_length": 285.203125,
"completions/mean_terminated_length": 285.203125,
"completions/min_length": 83.0,
"completions/min_terminated_length": 83.0,
"epoch": 0.0288,
"grad_norm": 1.375303030014038,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0697,
"num_tokens": 6541008.0,
"reward": 0.715735673904419,
"reward_std": 0.42697474360466003,
"rewards/accuracy_reward_step": 0.19921875,
"rewards/final_brier_reward_step": 0.29263025522232056,
"rewards/format_reward_step": 0.88671875,
"step": 27
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -9.751273354757783e-09,
"aux_brier/mean_group_std": 0.20084937529796282,
"aux_brier/mean_r": 0.46332548943538576,
"aux_brier/n_active_tok": 171.0,
"aux_brier/n_groups": 9.8125,
"aux_brier/n_step_records": 42.75,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5777543061423466,
"calib/avg_num_step_conf": 5.41015625,
"calib/ece": 0.6407212851405623,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.7349397590361446,
"calib/gap": 0.031039356516087135,
"calib/mean_conf": 0.9062666666666667,
"calib/mu_c": 0.9288294117647059,
"calib/mu_w": 0.8977900552486188,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.6369477911646586,
"calib/std_conf": 0.156506920864373,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 903.0,
"completions/max_terminated_length": 903.0,
"completions/mean_length": 284.25,
"completions/mean_terminated_length": 285.3647155761719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.5083498358726501,
"learning_rate": 4.777777777777778e-06,
"loss": 0.021,
"num_tokens": 6720720.0,
"reward": 0.823421835899353,
"reward_std": 0.4264180362224579,
"rewards/accuracy_reward_step": 0.265625,
"rewards/final_brier_reward_step": 0.36399996280670166,
"rewards/format_reward_step": 0.93359375,
"step": 28
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.350772413534097e-09,
"aux_brier/mean_group_std": 0.19330138283500833,
"aux_brier/mean_r": 0.4670754356864333,
"aux_brier/n_active_tok": 175.25,
"aux_brier/n_groups": 9.3125,
"aux_brier/n_step_records": 43.8125,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5134239130434782,
"calib/avg_num_step_conf": 5.55859375,
"calib/ece": 0.7283373983739839,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.7845528455284553,
"calib/gap": 0.029248695652173917,
"calib/mean_conf": 0.915329268292683,
"calib/mu_c": 0.9391086956521739,
"calib/mu_w": 0.90986,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.7283373983739839,
"calib/std_conf": 0.14145036087094834,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1416.0,
"completions/max_terminated_length": 1416.0,
"completions/mean_length": 291.09375,
"completions/mean_terminated_length": 291.09375,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.9130586385726929,
"learning_rate": 4.75e-06,
"loss": -0.0081,
"num_tokens": 6902368.0,
"reward": 0.7286772131919861,
"reward_std": 0.37026363611221313,
"rewards/accuracy_reward_step": 0.18359375,
"rewards/final_brier_reward_step": 0.2897088825702667,
"rewards/format_reward_step": 0.9453125,
"step": 29
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.160996042359752e-09,
"aux_brier/mean_group_std": 0.19545460216574173,
"aux_brier/mean_r": 0.4584137314399851,
"aux_brier/n_active_tok": 183.375,
"aux_brier/n_groups": 10.84375,
"aux_brier/n_step_records": 45.84375,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.5740104166666666,
"calib/avg_num_step_conf": 5.86328125,
"calib/ece": 0.7096774193548387,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.03160000000000007,
"calib/mean_conf": 0.902016129032258,
"calib/mu_c": 0.9275000000000001,
"calib/mu_w": 0.8959,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.7090725806451613,
"calib/std_conf": 0.1675775210053721,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1568.0,
"completions/max_terminated_length": 1568.0,
"completions/mean_length": 298.3984375,
"completions/mean_terminated_length": 299.5686340332031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.032,
"grad_norm": 0.11302852630615234,
"learning_rate": 4.722222222222222e-06,
"loss": 0.03,
"num_tokens": 7085742.0,
"reward": 0.7374452948570251,
"reward_std": 0.3994256556034088,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.30915623903274536,
"rewards/format_reward_step": 0.9453125,
"step": 30
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.083522158251142e-10,
"aux_brier/mean_group_std": 0.20664738762094437,
"aux_brier/mean_r": 0.5002378306483395,
"aux_brier/n_active_tok": 185.625,
"aux_brier/n_groups": 10.34375,
"aux_brier/n_step_records": 46.40625,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5280216802168021,
"calib/avg_num_step_conf": 6.08984375,
"calib/ece": 0.710364,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.7,
"calib/gap": 0.036683468834688426,
"calib/mean_conf": 0.890364,
"calib/mu_c": 0.9204444444444445,
"calib/mu_w": 0.8837609756097561,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.710364,
"calib/std_conf": 0.17630550616472535,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1032.0,
"completions/max_terminated_length": 1032.0,
"completions/mean_length": 287.5234375,
"completions/mean_terminated_length": 288.6510009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.03306666666666667,
"grad_norm": 1.7296192646026611,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0086,
"num_tokens": 7265260.0,
"reward": 0.7285614013671875,
"reward_std": 0.336410254240036,
"rewards/accuracy_reward_step": 0.17578125,
"rewards/final_brier_reward_step": 0.31268310546875,
"rewards/format_reward_step": 0.94921875,
"step": 31
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.435025147466522e-08,
"aux_brier/mean_group_std": 0.2067278140733199,
"aux_brier/mean_r": 0.5350343102531423,
"aux_brier/n_active_tok": 174.625,
"aux_brier/n_groups": 8.6875,
"aux_brier/n_step_records": 43.65625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5011188686000716,
"calib/avg_num_step_conf": 5.5234375,
"calib/ece": 0.6456521739130435,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.6205533596837944,
"calib/gap": 0.022327246688149005,
"calib/mean_conf": 0.8709486166007905,
"calib/mu_c": 0.8882456140350876,
"calib/mu_w": 0.8659183673469386,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.6456521739130435,
"calib/std_conf": 0.16714260238987344,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 789.0,
"completions/max_terminated_length": 789.0,
"completions/mean_length": 273.2890625,
"completions/mean_terminated_length": 273.2890625,
"completions/min_length": 74.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.33643317222595215,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0037,
"num_tokens": 7441926.0,
"reward": 0.8007103204727173,
"reward_std": 0.3542967438697815,
"rewards/accuracy_reward_step": 0.22265625,
"rewards/final_brier_reward_step": 0.3747164011001587,
"rewards/format_reward_step": 0.96875,
"step": 32
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -5.842371184061612e-09,
"aux_brier/mean_group_std": 0.2071667422683363,
"aux_brier/mean_r": 0.5277406418950331,
"aux_brier/n_active_tok": 188.5,
"aux_brier/n_groups": 10.53125,
"aux_brier/n_step_records": 47.125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4771356783919598,
"calib/avg_num_step_conf": 5.92578125,
"calib/ece": 0.666940562248996,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.642570281124498,
"calib/gap": 0.02484522613065321,
"calib/mean_conf": 0.8677437751004016,
"calib/mu_c": 0.8876,
"calib/mu_w": 0.8627547738693467,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.666940562248996,
"calib/std_conf": 0.19149247358883403,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1823.0,
"completions/max_terminated_length": 1823.0,
"completions/mean_length": 312.23046875,
"completions/mean_terminated_length": 312.23046875,
"completions/min_length": 93.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.0352,
"grad_norm": 0.5383950471878052,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0317,
"num_tokens": 7628729.0,
"reward": 0.755446195602417,
"reward_std": 0.36358797550201416,
"rewards/accuracy_reward_step": 0.1953125,
"rewards/final_brier_reward_step": 0.3420972228050232,
"rewards/format_reward_step": 0.94921875,
"step": 33
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.2737829242770449e-08,
"aux_brier/mean_group_std": 0.2017562837895255,
"aux_brier/mean_r": 0.5390499524522482,
"aux_brier/n_active_tok": 181.25,
"aux_brier/n_groups": 9.15625,
"aux_brier/n_step_records": 45.3125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4530153754291685,
"calib/avg_num_step_conf": 5.77734375,
"calib/ece": 0.5866932270916336,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.6175298804780877,
"calib/gap": 0.0031907747424987365,
"calib/mean_conf": 0.8680478087649403,
"calib/mu_c": 0.8702597402597402,
"calib/mu_w": 0.8670689655172414,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.57398406374502,
"calib/std_conf": 0.19062011852590993,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 846.0,
"completions/max_terminated_length": 846.0,
"completions/mean_length": 274.18359375,
"completions/mean_terminated_length": 275.25885009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.40328362584114075,
"learning_rate": 4.611111111111112e-06,
"loss": -0.0135,
"num_tokens": 7804032.0,
"reward": 0.881942868232727,
"reward_std": 0.439346045255661,
"rewards/accuracy_reward_step": 0.30078125,
"rewards/final_brier_reward_step": 0.4183965027332306,
"rewards/format_reward_step": 0.953125,
"step": 34
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.857877070722608e-09,
"aux_brier/mean_group_std": 0.21221449500511383,
"aux_brier/mean_r": 0.5864900531868034,
"aux_brier/n_active_tok": 204.625,
"aux_brier/n_groups": 11.9375,
"aux_brier/n_step_records": 51.15625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4663071613459879,
"calib/avg_num_step_conf": 6.4140625,
"calib/ece": 0.626573705179283,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5338645418326693,
"calib/gap": -0.030287316652286345,
"calib/mean_conf": 0.8422709163346614,
"calib/mu_c": 0.8193442622950821,
"calib/mu_w": 0.8496315789473684,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.6129083665338646,
"calib/std_conf": 0.1989394748463672,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1683.0,
"completions/max_terminated_length": 1683.0,
"completions/mean_length": 317.76171875,
"completions/mean_terminated_length": 317.76171875,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.19969779253005981,
"learning_rate": 4.583333333333333e-06,
"loss": 0.057,
"num_tokens": 7994635.0,
"reward": 0.828087568283081,
"reward_std": 0.31002628803253174,
"rewards/accuracy_reward_step": 0.23828125,
"rewards/final_brier_reward_step": 0.39828789234161377,
"rewards/format_reward_step": 0.98046875,
"step": 35
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.909607559580362e-08,
"aux_brier/mean_group_std": 0.19722265412808157,
"aux_brier/mean_r": 0.6097747510974779,
"aux_brier/n_active_tok": 183.25,
"aux_brier/n_groups": 9.375,
"aux_brier/n_step_records": 45.8125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.46617479327060163,
"calib/avg_num_step_conf": 5.765625,
"calib/ece": 0.4968900398406375,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.4581673306772908,
"calib/gap": -0.003002594810379011,
"calib/mean_conf": 0.8128310756972111,
"calib/mu_c": 0.8108333333333335,
"calib/mu_w": 0.8138359281437125,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.48752988047808765,
"calib/std_conf": 0.20786458008957415,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1956.0,
"completions/max_terminated_length": 1956.0,
"completions/mean_length": 300.7421875,
"completions/mean_terminated_length": 300.7421875,
"completions/min_length": 73.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.0384,
"grad_norm": 0.035050421953201294,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0316,
"num_tokens": 8174337.0,
"reward": 0.9423432350158691,
"reward_std": 0.4082295000553131,
"rewards/accuracy_reward_step": 0.3359375,
"rewards/final_brier_reward_step": 0.48812299966812134,
"rewards/format_reward_step": 0.96875,
"step": 36
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.402787857043066e-08,
"aux_brier/mean_group_std": 0.21307766927910904,
"aux_brier/mean_r": 0.631122288371699,
"aux_brier/n_active_tok": 194.125,
"aux_brier/n_groups": 11.46875,
"aux_brier/n_step_records": 48.53125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.561368778280543,
"calib/avg_num_step_conf": 6.23046875,
"calib/ece": 0.5385600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.412,
"calib/gap": 0.05445862960568848,
"calib/mean_conf": 0.7925599999999999,
"calib/mu_c": 0.8322058823529412,
"calib/mu_w": 0.7777472527472528,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.52956,
"calib/std_conf": 0.23220991882346456,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 898.0,
"completions/max_terminated_length": 898.0,
"completions/mean_length": 291.6953125,
"completions/mean_terminated_length": 292.8392333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.8316993713378906,
"learning_rate": 4.527777777777778e-06,
"loss": 0.0747,
"num_tokens": 8356107.0,
"reward": 0.8771013617515564,
"reward_std": 0.37841999530792236,
"rewards/accuracy_reward_step": 0.26953125,
"rewards/final_brier_reward_step": 0.4849679470062256,
"rewards/format_reward_step": 0.97265625,
"step": 37
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.015820735813751e-09,
"aux_brier/mean_group_std": 0.1990842058790093,
"aux_brier/mean_r": 0.6854511867977533,
"aux_brier/n_active_tok": 191.75,
"aux_brier/n_groups": 10.125,
"aux_brier/n_step_records": 47.9375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4861378977820636,
"calib/avg_num_step_conf": 6.0234375,
"calib/ece": 0.5195219123505976,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3904382470119522,
"calib/gap": -0.0017807778849243672,
"calib/mean_conf": 0.7777689243027889,
"calib/mu_c": 0.7764705882352941,
"calib/mu_w": 0.7782513661202185,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.5131872509960159,
"calib/std_conf": 0.22659220261363155,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1089.0,
"completions/max_terminated_length": 1089.0,
"completions/mean_length": 287.65625,
"completions/mean_terminated_length": 288.7843322753906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.5050545334815979,
"learning_rate": 4.5e-06,
"loss": 0.0058,
"num_tokens": 8536635.0,
"reward": 0.8718788623809814,
"reward_std": 0.3719131648540497,
"rewards/accuracy_reward_step": 0.265625,
"rewards/final_brier_reward_step": 0.4797031581401825,
"rewards/format_reward_step": 0.97265625,
"step": 38
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.3672130289021922e-08,
"aux_brier/mean_group_std": 0.2077947444626746,
"aux_brier/mean_r": 0.7109347300063643,
"aux_brier/n_active_tok": 185.75,
"aux_brier/n_groups": 10.34375,
"aux_brier/n_step_records": 46.4375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4582189239332097,
"calib/avg_num_step_conf": 5.8359375,
"calib/ece": 0.4736904761904761,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.3253968253968254,
"calib/gap": -0.04231168831168819,
"calib/mean_conf": 0.7282142857142857,
"calib/mu_c": 0.6988311688311689,
"calib/mu_w": 0.7411428571428571,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4481746031746031,
"calib/std_conf": 0.25118999661430186,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1725.0,
"completions/max_terminated_length": 1725.0,
"completions/mean_length": 296.5859375,
"completions/mean_terminated_length": 296.5859375,
"completions/min_length": 38.0,
"completions/min_terminated_length": 38.0,
"epoch": 0.0416,
"grad_norm": 1.7626452445983887,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0308,
"num_tokens": 8718649.0,
"reward": 0.9181327819824219,
"reward_std": 0.3791179060935974,
"rewards/accuracy_reward_step": 0.30078125,
"rewards/final_brier_reward_step": 0.516281247138977,
"rewards/format_reward_step": 0.9765625,
"step": 39
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.385072284451841e-08,
"aux_brier/mean_group_std": 0.17809794199122972,
"aux_brier/mean_r": 0.7468761257165805,
"aux_brier/n_active_tok": 199.125,
"aux_brier/n_groups": 11.28125,
"aux_brier/n_step_records": 49.78125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.48548223350253805,
"calib/avg_num_step_conf": 6.25390625,
"calib/ece": 0.4875438596491228,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.2631578947368421,
"calib/gap": 0.01573231810490694,
"calib/mean_conf": 0.6832523616734143,
"calib/mu_c": 0.6958,
"calib/mu_w": 0.680067681895093,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4841835357624831,
"calib/std_conf": 0.27774150629818234,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2143.0,
"completions/max_terminated_length": 2143.0,
"completions/mean_length": 320.19140625,
"completions/mean_terminated_length": 320.19140625,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.11550385504961014,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0535,
"num_tokens": 8907378.0,
"reward": 0.811775803565979,
"reward_std": 0.37996870279312134,
"rewards/accuracy_reward_step": 0.203125,
"rewards/final_brier_reward_step": 0.5127280950546265,
"rewards/format_reward_step": 0.9609375,
"step": 40
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.036460420602349e-08,
"aux_brier/mean_group_std": 0.17564145897266573,
"aux_brier/mean_r": 0.775780914182563,
"aux_brier/n_active_tok": 189.875,
"aux_brier/n_groups": 11.1875,
"aux_brier/n_step_records": 47.46875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4394172763445001,
"calib/avg_num_step_conf": 6.03125,
"calib/ece": 0.32107509881422935,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.1857707509881423,
"calib/gap": -0.055001155179052574,
"calib/mean_conf": 0.6242213438735178,
"calib/mu_c": 0.5922641509433964,
"calib/mu_w": 0.6472653061224489,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2631620553359684,
"calib/std_conf": 0.27202908764445805,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 895.0,
"completions/max_terminated_length": 895.0,
"completions/mean_length": 283.78125,
"completions/mean_terminated_length": 284.8941345214844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 65.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.238211989402771,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0189,
"num_tokens": 9087274.0,
"reward": 1.0600254535675049,
"reward_std": 0.4129643440246582,
"rewards/accuracy_reward_step": 0.421875,
"rewards/final_brier_reward_step": 0.5994765758514404,
"rewards/format_reward_step": 0.9765625,
"step": 41
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.3230203970565624e-08,
"aux_brier/mean_group_std": 0.14368066246345312,
"aux_brier/mean_r": 0.850863281087842,
"aux_brier/n_active_tok": 176.75,
"aux_brier/n_groups": 9.1875,
"aux_brier/n_step_records": 44.1875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5253285870755751,
"calib/avg_num_step_conf": 5.58984375,
"calib/ece": 0.27551181102362204,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.10236220472440945,
"calib/gap": 0.029460569550931015,
"calib/mean_conf": 0.5637007874015747,
"calib/mu_c": 0.5829545454545454,
"calib/mu_w": 0.5534939759036144,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24637795275590552,
"calib/std_conf": 0.26609986625001836,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 742.0,
"completions/max_terminated_length": 742.0,
"completions/mean_length": 262.30859375,
"completions/mean_terminated_length": 263.3372802734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.0448,
"grad_norm": 0.45979180932044983,
"learning_rate": 4.388888888888889e-06,
"loss": 0.0198,
"num_tokens": 9258793.0,
"reward": 1.0074474811553955,
"reward_std": 0.35529351234436035,
"rewards/accuracy_reward_step": 0.34765625,
"rewards/final_brier_reward_step": 0.6626027822494507,
"rewards/format_reward_step": 0.98828125,
"step": 42
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.329702780307535e-08,
"aux_brier/mean_group_std": 0.14957277166942062,
"aux_brier/mean_r": 0.8259949757507827,
"aux_brier/n_active_tok": 210.625,
"aux_brier/n_groups": 14.1875,
"aux_brier/n_step_records": 52.65625,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5152524544179524,
"calib/avg_num_step_conf": 6.6171875,
"calib/ece": 0.347479674796748,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.11788617886178862,
"calib/gap": 0.01926718092566615,
"calib/mean_conf": 0.5630081300813008,
"calib/mu_c": 0.5774193548387097,
"calib/mu_w": 0.5581521739130435,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32922764227642276,
"calib/std_conf": 0.270925648935229,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2167.0,
"completions/max_terminated_length": 2167.0,
"completions/mean_length": 322.0625,
"completions/mean_terminated_length": 322.0625,
"completions/min_length": 48.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.09345488250255585,
"learning_rate": 4.361111111111112e-06,
"loss": 0.0706,
"num_tokens": 9446465.0,
"reward": 0.8823894262313843,
"reward_std": 0.2873356342315674,
"rewards/accuracy_reward_step": 0.24609375,
"rewards/final_brier_reward_step": 0.6233078241348267,
"rewards/format_reward_step": 0.9609375,
"step": 43
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.4479341359517903e-08,
"aux_brier/mean_group_std": 0.12620192815382447,
"aux_brier/mean_r": 0.8583310163183968,
"aux_brier/n_active_tok": 207.625,
"aux_brier/n_groups": 12.625,
"aux_brier/n_step_records": 51.90625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5389643463497452,
"calib/avg_num_step_conf": 6.578125,
"calib/ece": 0.2923015873015873,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.10317460317460317,
"calib/gap": 0.03947368421052644,
"calib/mean_conf": 0.5152380952380953,
"calib/mu_c": 0.545,
"calib/mu_w": 0.5055263157894736,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28075396825396826,
"calib/std_conf": 0.28069277755964733,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2139.0,
"completions/max_terminated_length": 2139.0,
"completions/mean_length": 321.21484375,
"completions/mean_terminated_length": 322.4745178222656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.22092793881893158,
"learning_rate": 4.333333333333334e-06,
"loss": 0.056,
"num_tokens": 9635016.0,
"reward": 0.8987468481063843,
"reward_std": 0.2588950991630554,
"rewards/accuracy_reward_step": 0.2421875,
"rewards/final_brier_reward_step": 0.6653000116348267,
"rewards/format_reward_step": 0.98046875,
"step": 44
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.225510651436394e-08,
"aux_brier/mean_group_std": 0.1263788228170714,
"aux_brier/mean_r": 0.8728943111172088,
"aux_brier/n_active_tok": 179.75,
"aux_brier/n_groups": 10.1875,
"aux_brier/n_step_records": 44.9375,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5752585944630362,
"calib/avg_num_step_conf": 5.6484375,
"calib/ece": 0.17851807228915667,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.06827309236947791,
"calib/gap": 0.06491375114085796,
"calib/mean_conf": 0.41818875502008035,
"calib/mu_c": 0.46328947368421053,
"calib/mu_w": 0.3983757225433526,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.14574297188755023,
"calib/std_conf": 0.2666398389756717,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1953.0,
"completions/max_terminated_length": 1953.0,
"completions/mean_length": 303.140625,
"completions/mean_terminated_length": 305.5275573730469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 93.0,
"epoch": 0.048,
"grad_norm": 0.21973006427288055,
"learning_rate": 4.305555555555556e-06,
"loss": 0.0417,
"num_tokens": 9817668.0,
"reward": 0.9508191347122192,
"reward_std": 0.36955684423446655,
"rewards/accuracy_reward_step": 0.296875,
"rewards/final_brier_reward_step": 0.7017141580581665,
"rewards/format_reward_step": 0.95703125,
"step": 45
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.3159327044709457e-08,
"aux_brier/mean_group_std": 0.11285619288794033,
"aux_brier/mean_r": 0.8738174159879206,
"aux_brier/n_active_tok": 211.25,
"aux_brier/n_groups": 14.0625,
"aux_brier/n_step_records": 52.8125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5580414312617703,
"calib/avg_num_step_conf": 6.6796875,
"calib/ece": 0.1801984126984127,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.047619047619047616,
"calib/gap": 0.04394802259887015,
"calib/mean_conf": 0.4135317460317461,
"calib/mu_c": 0.4444000000000001,
"calib/mu_w": 0.4004519774011299,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.14805555555555555,
"calib/std_conf": 0.25272920404949456,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2560.0,
"completions/max_terminated_length": 2560.0,
"completions/mean_length": 321.11328125,
"completions/mean_terminated_length": 321.11328125,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.3134056329727173,
"learning_rate": 4.277777777777778e-06,
"loss": 0.019,
"num_tokens": 10004641.0,
"reward": 0.9625797271728516,
"reward_std": 0.31446170806884766,
"rewards/accuracy_reward_step": 0.29296875,
"rewards/final_brier_reward_step": 0.7175066471099854,
"rewards/format_reward_step": 0.98046875,
"step": 46
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.962741924161243e-08,
"aux_brier/mean_group_std": 0.08731022754734642,
"aux_brier/mean_r": 0.9216793358362947,
"aux_brier/n_active_tok": 197.25,
"aux_brier/n_groups": 11.5625,
"aux_brier/n_step_records": 49.3125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.49510985116938344,
"calib/avg_num_step_conf": 6.21875,
"calib/ece": 0.1970395256916996,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.015810276679841896,
"calib/gap": -0.012137278525868211,
"calib/mean_conf": 0.3270711462450593,
"calib/mu_c": 0.31891566265060234,
"calib/mu_w": 0.33105294117647055,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.09802371541501975,
"calib/std_conf": 0.23131016016407235,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2347.0,
"completions/max_terminated_length": 2347.0,
"completions/mean_length": 328.40234375,
"completions/mean_terminated_length": 328.40234375,
"completions/min_length": 64.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.2886498272418976,
"learning_rate": 4.25e-06,
"loss": 0.0581,
"num_tokens": 10194688.0,
"reward": 0.9934947490692139,
"reward_std": 0.3531001806259155,
"rewards/accuracy_reward_step": 0.32421875,
"rewards/final_brier_reward_step": 0.7083537578582764,
"rewards/format_reward_step": 0.984375,
"step": 47
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.939587028296444e-08,
"aux_brier/mean_group_std": 0.08426508432514862,
"aux_brier/mean_r": 0.9278154236719309,
"aux_brier/n_active_tok": 179.625,
"aux_brier/n_groups": 10.1875,
"aux_brier/n_step_records": 44.90625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.44981517197042753,
"calib/avg_num_step_conf": 5.65234375,
"calib/ece": 0.20378486055776893,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0199203187250996,
"calib/gap": -0.044140147862423595,
"calib/mean_conf": 0.29159362549800794,
"calib/mu_c": 0.2594117647058824,
"calib/mu_w": 0.303551912568306,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11223107569721116,
"calib/std_conf": 0.2137034709713028,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1087.0,
"completions/max_terminated_length": 1087.0,
"completions/mean_length": 291.2578125,
"completions/mean_terminated_length": 292.4000244140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.0512,
"grad_norm": 0.09665241092443466,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0011,
"num_tokens": 10372938.0,
"reward": 0.9341095685958862,
"reward_std": 0.29305028915405273,
"rewards/accuracy_reward_step": 0.265625,
"rewards/final_brier_reward_step": 0.7208132743835449,
"rewards/format_reward_step": 0.9765625,
"step": 48
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.8392815825940545e-08,
"aux_brier/mean_group_std": 0.07645873166200375,
"aux_brier/mean_r": 0.9382534881779221,
"aux_brier/n_active_tok": 194.875,
"aux_brier/n_groups": 10.625,
"aux_brier/n_step_records": 48.71875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5316455696202532,
"calib/avg_num_step_conf": 6.140625,
"calib/ece": 0.22219291338582675,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.007874015748031496,
"calib/gap": -0.006096255274261608,
"calib/mean_conf": 0.2705629921259842,
"calib/mu_c": 0.26677083333333335,
"calib/mu_w": 0.27286708860759495,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0574015748031496,
"calib/std_conf": 0.208627163322724,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2445.0,
"completions/max_terminated_length": 2445.0,
"completions/mean_length": 319.4765625,
"completions/mean_terminated_length": 319.4765625,
"completions/min_length": 103.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.2979802191257477,
"learning_rate": 4.194444444444445e-06,
"loss": 0.0278,
"num_tokens": 10559260.0,
"reward": 1.0435776710510254,
"reward_std": 0.31182634830474854,
"rewards/accuracy_reward_step": 0.375,
"rewards/final_brier_reward_step": 0.6977483630180359,
"rewards/format_reward_step": 0.98828125,
"step": 49
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.08828793158267e-07,
"aux_brier/mean_group_std": 0.06427214662614472,
"aux_brier/mean_r": 0.9369866366067142,
"aux_brier/n_active_tok": 206.0,
"aux_brier/n_groups": 13.59375,
"aux_brier/n_step_records": 51.5,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5766666666666667,
"calib/avg_num_step_conf": 6.53515625,
"calib/ece": 0.21261904761904762,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.011904761904761904,
"calib/gap": 0.03258039215686276,
"calib/mean_conf": 0.259920634920635,
"calib/mu_c": 0.2793137254901961,
"calib/mu_w": 0.24673333333333336,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03388888888888889,
"calib/std_conf": 0.2018967048157792,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2422.0,
"completions/max_terminated_length": 2422.0,
"completions/mean_length": 370.22265625,
"completions/mean_terminated_length": 370.22265625,
"completions/min_length": 109.0,
"completions/min_terminated_length": 109.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.18312303721904755,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0785,
"num_tokens": 10759397.0,
"reward": 1.0602662563323975,
"reward_std": 0.32147830724716187,
"rewards/accuracy_reward_step": 0.3984375,
"rewards/final_brier_reward_step": 0.694190263748169,
"rewards/format_reward_step": 0.9765625,
"step": 50
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.72618643329492e-07,
"aux_brier/mean_group_std": 0.07035482235431693,
"aux_brier/mean_r": 0.9472252913957228,
"aux_brier/n_active_tok": 206.125,
"aux_brier/n_groups": 12.1875,
"aux_brier/n_step_records": 51.53125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5368758434547909,
"calib/avg_num_step_conf": 6.66015625,
"calib/ece": 0.22499043824701193,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.01195219123505976,
"calib/gap": 0.019218421052631607,
"calib/mean_conf": 0.2114239043824701,
"calib/mu_c": 0.2233684210526316,
"calib/mu_w": 0.20415,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.028964143426294827,
"calib/std_conf": 0.17743085805107256,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2576.0,
"completions/max_terminated_length": 2576.0,
"completions/mean_length": 340.421875,
"completions/mean_terminated_length": 341.75689697265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.0544,
"grad_norm": 0.4266815185546875,
"learning_rate": 4.138888888888889e-06,
"loss": -0.0116,
"num_tokens": 10955841.0,
"reward": 1.0364437103271484,
"reward_std": 0.2506124675273895,
"rewards/accuracy_reward_step": 0.37109375,
"rewards/final_brier_reward_step": 0.7004624009132385,
"rewards/format_reward_step": 0.98046875,
"step": 51
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.899122092952515e-07,
"aux_brier/mean_group_std": 0.0794486182257967,
"aux_brier/mean_r": 0.9407123680653106,
"aux_brier/n_active_tok": 168.875,
"aux_brier/n_groups": 9.71875,
"aux_brier/n_step_records": 42.21875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5141870684243566,
"calib/avg_num_step_conf": 5.34765625,
"calib/ece": 0.36494071146245055,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006596359070935365,
"calib/mean_conf": 0.1799604743083004,
"calib/mu_c": 0.18303703703703705,
"calib/mu_w": 0.1764406779661017,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.005652173913043483,
"calib/std_conf": 0.15306228039208883,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1005.0,
"completions/max_terminated_length": 1005.0,
"completions/mean_length": 300.8984375,
"completions/mean_terminated_length": 302.0784606933594,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.07729270309209824,
"learning_rate": 4.111111111111111e-06,
"loss": -0.0015,
"num_tokens": 11140823.0,
"reward": 1.1750967502593994,
"reward_std": 0.32514822483062744,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.5988246202468872,
"rewards/format_reward_step": 0.98828125,
"step": 52
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.953884373450456e-07,
"aux_brier/mean_group_std": 0.07555331336539535,
"aux_brier/mean_r": 0.9359634580074322,
"aux_brier/n_active_tok": 204.75,
"aux_brier/n_groups": 11.6875,
"aux_brier/n_step_records": 51.1875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5449483891147952,
"calib/avg_num_step_conf": 6.4765625,
"calib/ece": 0.2586929133858268,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.011811023622047244,
"calib/gap": 0.03334250860181423,
"calib/mean_conf": 0.22036220472440946,
"calib/mu_c": 0.23860869565217394,
"calib/mu_w": 0.2052661870503597,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.013149606299212599,
"calib/std_conf": 0.17698292409696154,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1609.0,
"completions/max_terminated_length": 1609.0,
"completions/mean_length": 369.98828125,
"completions/mean_terminated_length": 369.98828125,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.11660010367631912,
"learning_rate": 4.083333333333334e-06,
"loss": -0.0032,
"num_tokens": 11341364.0,
"reward": 1.1119604110717773,
"reward_std": 0.32015568017959595,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6744041442871094,
"rewards/format_reward_step": 0.98828125,
"step": 53
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.662577220397978e-07,
"aux_brier/mean_group_std": 0.07236882672802149,
"aux_brier/mean_r": 0.945290777109079,
"aux_brier/n_active_tok": 178.375,
"aux_brier/n_groups": 10.0625,
"aux_brier/n_step_records": 44.59375,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.48472530179847256,
"calib/avg_num_step_conf": 5.64453125,
"calib/ece": 0.3773098039215686,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.00784313725490196,
"calib/gap": -0.021984294161123463,
"calib/mean_conf": 0.17727843137254903,
"calib/mu_c": 0.16667424242424242,
"calib/mu_w": 0.18865853658536588,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.018470588235294114,
"calib/std_conf": 0.1651200066424957,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1003.0,
"completions/max_terminated_length": 1003.0,
"completions/mean_length": 301.01171875,
"completions/mean_terminated_length": 302.1921691894531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.0576,
"grad_norm": 0.24732919037342072,
"learning_rate": 4.055555555555556e-06,
"loss": -0.0221,
"num_tokens": 11524655.0,
"reward": 1.1660503149032593,
"reward_std": 0.25720590353012085,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.5938885807991028,
"rewards/format_reward_step": 0.99609375,
"step": 54
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.1042314287945665e-06,
"aux_brier/mean_group_std": 0.05991244713124709,
"aux_brier/mean_r": 0.9538390167173045,
"aux_brier/n_active_tok": 188.125,
"aux_brier/n_groups": 10.71875,
"aux_brier/n_step_records": 47.03125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.518529810298103,
"calib/avg_num_step_conf": 5.890625,
"calib/ece": 0.228996062992126,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009935636856368557,
"calib/mean_conf": 0.1658070866141732,
"calib/mu_c": 0.17222222222222222,
"calib/mu_w": 0.16228658536585366,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.02023622047244095,
"calib/std_conf": 0.13575915953849596,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2063.0,
"completions/max_terminated_length": 2063.0,
"completions/mean_length": 329.7265625,
"completions/mean_terminated_length": 329.7265625,
"completions/min_length": 98.0,
"completions/min_terminated_length": 98.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.08426016569137573,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0102,
"num_tokens": 11716889.0,
"reward": 1.0277278423309326,
"reward_std": 0.26626989245414734,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.7124737501144409,
"rewards/format_reward_step": 0.98828125,
"step": 55
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.280250077067562e-07,
"aux_brier/mean_group_std": 0.06364407843383066,
"aux_brier/mean_r": 0.9420665660996802,
"aux_brier/n_active_tok": 207.375,
"aux_brier/n_groups": 12.75,
"aux_brier/n_step_records": 51.84375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5009453781512605,
"calib/avg_num_step_conf": 6.5703125,
"calib/ece": 0.23035809448818895,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.011811023622047244,
"calib/gap": -0.022974341736694687,
"calib/mean_conf": 0.1670434803149606,
"calib/mu_c": 0.15166695238095237,
"calib/mu_w": 0.17464129411764706,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.03334645669291338,
"calib/std_conf": 0.1553176301624802,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2393.0,
"completions/max_terminated_length": 2393.0,
"completions/mean_length": 362.73046875,
"completions/mean_terminated_length": 364.1529541015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.12983429431915283,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0236,
"num_tokens": 11916588.0,
"reward": 1.0022120475769043,
"reward_std": 0.2770439386367798,
"rewards/accuracy_reward_step": 0.328125,
"rewards/final_brier_reward_step": 0.7119733095169067,
"rewards/format_reward_step": 0.9921875,
"step": 56
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.376654273221316e-07,
"aux_brier/mean_group_std": 0.04942550541668625,
"aux_brier/mean_r": 0.9636226090382859,
"aux_brier/n_active_tok": 195.125,
"aux_brier/n_groups": 10.84375,
"aux_brier/n_step_records": 48.78125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.47398989898989896,
"calib/avg_num_step_conf": 6.24609375,
"calib/ece": 0.34597896825396823,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.02168409090909093,
"calib/mean_conf": 0.152275,
"calib/mu_c": 0.14091666666666666,
"calib/mu_w": 0.1626007575757576,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.011031746031746033,
"calib/std_conf": 0.11978836219044754,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2107.0,
"completions/max_terminated_length": 2107.0,
"completions/mean_length": 357.76953125,
"completions/mean_terminated_length": 359.1725769042969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 104.0,
"epoch": 0.0608,
"grad_norm": 0.5027966499328613,
"learning_rate": 3.972222222222223e-06,
"loss": -0.0133,
"num_tokens": 12114969.0,
"reward": 1.117539882659912,
"reward_std": 0.27990177273750305,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.6107839345932007,
"rewards/format_reward_step": 0.984375,
"step": 57
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.1905553668498676e-07,
"aux_brier/mean_group_std": 0.0489274559063608,
"aux_brier/mean_r": 0.9568433270706841,
"aux_brier/n_active_tok": 235.0,
"aux_brier/n_groups": 16.21875,
"aux_brier/n_step_records": 58.75,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.4930229345122962,
"calib/avg_num_step_conf": 7.41015625,
"calib/ece": 0.24109834677419353,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.004032258064516129,
"calib/gap": 0.00532516855484938,
"calib/mean_conf": 0.17632100806451612,
"calib/mu_c": 0.17962776595744676,
"calib/mu_w": 0.17430259740259738,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.019193548387096776,
"calib/std_conf": 0.14273137815163298,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2774.0,
"completions/max_terminated_length": 2774.0,
"completions/mean_length": 447.6953125,
"completions/mean_terminated_length": 449.4510192871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.036757275462150574,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0545,
"num_tokens": 12335899.0,
"reward": 1.0200591087341309,
"reward_std": 0.337308406829834,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.6817988753318787,
"rewards/format_reward_step": 0.96484375,
"step": 58
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.014187950795449e-07,
"aux_brier/mean_group_std": 0.05336881759843583,
"aux_brier/mean_r": 0.9565652547414072,
"aux_brier/n_active_tok": 210.5,
"aux_brier/n_groups": 13.15625,
"aux_brier/n_step_records": 52.625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5045234454638124,
"calib/avg_num_step_conf": 6.58984375,
"calib/ece": 0.27920948616600794,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0021645642201834348,
"calib/mean_conf": 0.16624505928853756,
"calib/mu_c": 0.16747706422018346,
"calib/mu_w": 0.16531250000000003,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.007312252964426878,
"calib/std_conf": 0.12189678142362566,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2247.0,
"completions/max_terminated_length": 2247.0,
"completions/mean_length": 386.48828125,
"completions/mean_terminated_length": 386.48828125,
"completions/min_length": 113.0,
"completions/min_terminated_length": 113.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.09482187032699585,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0794,
"num_tokens": 12541088.0,
"reward": 1.0827938318252563,
"reward_std": 0.28609517216682434,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6593005657196045,
"rewards/format_reward_step": 0.984375,
"step": 59
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -9.191440295541753e-07,
"aux_brier/mean_group_std": 0.04327769012012292,
"aux_brier/mean_r": 0.9651614148333408,
"aux_brier/n_active_tok": 193.875,
"aux_brier/n_groups": 10.90625,
"aux_brier/n_step_records": 48.46875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5230470490863945,
"calib/avg_num_step_conf": 6.09765625,
"calib/ece": 0.31693452380952375,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.007616190233653786,
"calib/mean_conf": 0.14084325396825398,
"calib/mu_c": 0.14504424778761063,
"calib/mu_w": 0.13742805755395684,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.004682539682539683,
"calib/std_conf": 0.10867851707462094,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1680.0,
"completions/max_terminated_length": 1680.0,
"completions/mean_length": 362.8046875,
"completions/mean_terminated_length": 364.22747802734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.064,
"grad_norm": 0.5038781762123108,
"learning_rate": 3.88888888888889e-06,
"loss": -0.0006,
"num_tokens": 12742822.0,
"reward": 1.0935592651367188,
"reward_std": 0.3151361644268036,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.6398622989654541,
"rewards/format_reward_step": 0.984375,
"step": 60
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.7857904763829424e-06,
"aux_brier/mean_group_std": 0.04825335258349303,
"aux_brier/mean_r": 0.9574052712981143,
"aux_brier/n_active_tok": 185.625,
"aux_brier/n_groups": 10.96875,
"aux_brier/n_step_records": 46.40625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5453249007936508,
"calib/avg_num_step_conf": 5.83203125,
"calib/ece": 0.35142519685039364,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.018094866071428545,
"calib/mean_conf": 0.1449527559055118,
"calib/mu_c": 0.15407142857142858,
"calib/mu_w": 0.13597656250000004,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.00015748031496063005,
"calib/std_conf": 0.11532670868666751,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2264.0,
"completions/max_terminated_length": 2264.0,
"completions/mean_length": 330.015625,
"completions/mean_terminated_length": 330.015625,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.14747215807437897,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0755,
"num_tokens": 12931370.0,
"reward": 1.142686367034912,
"reward_std": 0.27778032422065735,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.617620587348938,
"rewards/format_reward_step": 0.9921875,
"step": 61
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.01399150374138e-08,
"aux_brier/mean_group_std": 0.05118887125523231,
"aux_brier/mean_r": 0.9571227176503799,
"aux_brier/n_active_tok": 212.625,
"aux_brier/n_groups": 14.5,
"aux_brier/n_step_records": 53.15625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5131968758416376,
"calib/avg_num_step_conf": 6.7578125,
"calib/ece": 0.24676587301587302,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0002524912469701601,
"calib/mean_conf": 0.1464880952380952,
"calib/mu_c": 0.14632978723404252,
"calib/mu_w": 0.14658227848101268,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.010119047619047618,
"calib/std_conf": 0.11910074707370537,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2481.0,
"completions/max_terminated_length": 2481.0,
"completions/mean_length": 398.13671875,
"completions/mean_terminated_length": 399.69805908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.05607409402728081,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0342,
"num_tokens": 13140373.0,
"reward": 1.028857707977295,
"reward_std": 0.275343656539917,
"rewards/accuracy_reward_step": 0.3671875,
"rewards/final_brier_reward_step": 0.6857432723045349,
"rewards/format_reward_step": 0.98046875,
"step": 62
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.936687851755714e-06,
"aux_brier/mean_group_std": 0.05187988928767217,
"aux_brier/mean_r": 0.9580297117337391,
"aux_brier/n_active_tok": 208.125,
"aux_brier/n_groups": 12.875,
"aux_brier/n_step_records": 52.03125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.44259305835010065,
"calib/avg_num_step_conf": 6.59375,
"calib/ece": 0.29705511811023616,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.019607394366197184,
"calib/mean_conf": 0.15908661417322836,
"calib/mu_c": 0.14812500000000003,
"calib/mu_w": 0.16773239436619722,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.007598425196850392,
"calib/std_conf": 0.12720808940392123,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2419.0,
"completions/max_terminated_length": 2419.0,
"completions/mean_length": 395.234375,
"completions/mean_terminated_length": 395.234375,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.0672,
"grad_norm": 0.07673514634370804,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.0305,
"num_tokens": 13350193.0,
"reward": 1.0943763256072998,
"reward_std": 0.30822843313217163,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6431306004524231,
"rewards/format_reward_step": 0.9921875,
"step": 63
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.176654963052641e-07,
"aux_brier/mean_group_std": 0.043067219126586974,
"aux_brier/mean_r": 0.961962838982981,
"aux_brier/n_active_tok": 215.25,
"aux_brier/n_groups": 14.03125,
"aux_brier/n_step_records": 53.8125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4872828396450443,
"calib/avg_num_step_conf": 6.81640625,
"calib/ece": 0.36685770750988134,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.009485751781027424,
"calib/mean_conf": 0.13907114624505929,
"calib/mu_c": 0.14379527559055125,
"calib/mu_w": 0.13430952380952382,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.001976284584980237,
"calib/std_conf": 0.11225086105583791,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2997.0,
"completions/max_terminated_length": 2997.0,
"completions/mean_length": 396.078125,
"completions/mean_terminated_length": 397.63140869140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.04356149584054947,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0781,
"num_tokens": 13555365.0,
"reward": 1.1410574913024902,
"reward_std": 0.3345259726047516,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.6032926440238953,
"rewards/format_reward_step": 0.98828125,
"step": 64
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.283241036822204e-07,
"aux_brier/mean_group_std": 0.04073559344267951,
"aux_brier/mean_r": 0.9675460601660378,
"aux_brier/n_active_tok": 189.5,
"aux_brier/n_groups": 11.0,
"aux_brier/n_step_records": 47.375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5487706193588548,
"calib/avg_num_step_conf": 6.15625,
"calib/ece": 0.3487322834645669,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01530108932461871,
"calib/mean_conf": 0.1292204724409449,
"calib/mu_c": 0.13735294117647057,
"calib/mu_w": 0.12205185185185186,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.004724409448818898,
"calib/std_conf": 0.10920650909228467,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1079.0,
"completions/max_terminated_length": 1079.0,
"completions/mean_length": 328.2421875,
"completions/mean_terminated_length": 329.5294189453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.2852790951728821,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0273,
"num_tokens": 13744419.0,
"reward": 1.1175971031188965,
"reward_std": 0.28578680753707886,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.626638650894165,
"rewards/format_reward_step": 0.9921875,
"step": 65
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.791922440708138e-07,
"aux_brier/mean_group_std": 0.04096224626962034,
"aux_brier/mean_r": 0.9695239851345836,
"aux_brier/n_active_tok": 222.375,
"aux_brier/n_groups": 14.75,
"aux_brier/n_step_records": 55.59375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4939931350114416,
"calib/avg_num_step_conf": 7.10546875,
"calib/ece": 0.3323214285714286,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.007744088482074779,
"calib/mean_conf": 0.13283730158730161,
"calib/mu_c": 0.12859649122807018,
"calib/mu_w": 0.13634057971014496,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0063888888888888875,
"calib/std_conf": 0.11622700027411259,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2386.0,
"completions/max_terminated_length": 2386.0,
"completions/mean_length": 413.87109375,
"completions/mean_terminated_length": 415.494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.0704,
"grad_norm": 0.03689504787325859,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0632,
"num_tokens": 13956722.0,
"reward": 1.0903115272521973,
"reward_std": 0.27287259697914124,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6190588474273682,
"rewards/format_reward_step": 0.98046875,
"step": 66
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.273451621449655e-07,
"aux_brier/mean_group_std": 0.05023670370776576,
"aux_brier/mean_r": 0.9571496179015527,
"aux_brier/n_active_tok": 205.0,
"aux_brier/n_groups": 12.8125,
"aux_brier/n_step_records": 51.25,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5066753601589666,
"calib/avg_num_step_conf": 6.640625,
"calib/ece": 0.4093700787401574,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.007669523099850972,
"calib/mean_conf": 0.12425196850393702,
"calib/mu_c": 0.12056818181818182,
"calib/mu_w": 0.1282377049180328,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.006968503937007873,
"calib/std_conf": 0.10168768524639875,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2397.0,
"completions/max_terminated_length": 2397.0,
"completions/mean_length": 392.375,
"completions/mean_terminated_length": 393.91375732421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 118.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.19599691033363342,
"learning_rate": 3.694444444444445e-06,
"loss": -0.0191,
"num_tokens": 14162178.0,
"reward": 1.1526442766189575,
"reward_std": 0.26171380281448364,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.5715146660804749,
"rewards/format_reward_step": 0.98828125,
"step": 67
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.572307031354029e-07,
"aux_brier/mean_group_std": 0.03814839863849587,
"aux_brier/mean_r": 0.9717990202583285,
"aux_brier/n_active_tok": 199.25,
"aux_brier/n_groups": 11.8125,
"aux_brier/n_step_records": 49.8125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.506359649122807,
"calib/avg_num_step_conf": 6.2734375,
"calib/ece": 0.33337244094488183,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.000802481203007549,
"calib/mean_conf": 0.1270212598425197,
"calib/mu_c": 0.12657894736842107,
"calib/mu_w": 0.12738142857142862,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.005787401574803149,
"calib/std_conf": 0.08653963283927257,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2688.0,
"completions/max_terminated_length": 2688.0,
"completions/mean_length": 380.609375,
"completions/mean_terminated_length": 380.609375,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.05051703378558159,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0599,
"num_tokens": 14363702.0,
"reward": 1.1004488468170166,
"reward_std": 0.2596089243888855,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6361704468727112,
"rewards/format_reward_step": 0.9921875,
"step": 68
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.0029460362790417e-08,
"aux_brier/mean_group_std": 0.050548947233676395,
"aux_brier/mean_r": 0.9439660831761795,
"aux_brier/n_active_tok": 209.25,
"aux_brier/n_groups": 13.5,
"aux_brier/n_step_records": 52.3125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5109879032258065,
"calib/avg_num_step_conf": 6.54296875,
"calib/ece": 0.26138339920948606,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0048487903225806345,
"calib/mean_conf": 0.12209486166007906,
"calib/mu_c": 0.12516129032258064,
"calib/mu_w": 0.1203125,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.007944664031620552,
"calib/std_conf": 0.11601399880299575,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2431.0,
"completions/max_terminated_length": 2431.0,
"completions/mean_length": 432.25390625,
"completions/mean_terminated_length": 432.25390625,
"completions/min_length": 106.0,
"completions/min_terminated_length": 106.0,
"epoch": 0.0736,
"grad_norm": 0.007314007729291916,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0849,
"num_tokens": 14578855.0,
"reward": 1.02939772605896,
"reward_std": 0.26667526364326477,
"rewards/accuracy_reward_step": 0.36328125,
"rewards/final_brier_reward_step": 0.6879035234451294,
"rewards/format_reward_step": 0.98828125,
"step": 69
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.035032152820527e-07,
"aux_brier/mean_group_std": 0.04170518192401591,
"aux_brier/mean_r": 0.9653501564889758,
"aux_brier/n_active_tok": 213.25,
"aux_brier/n_groups": 13.875,
"aux_brier/n_step_records": 53.3125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.43133814929480563,
"calib/avg_num_step_conf": 6.80078125,
"calib/ece": 0.29827477016129034,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.027646224905400765,
"calib/mean_conf": 0.11261232661290324,
"calib/mu_c": 0.09555638947368421,
"calib/mu_w": 0.12320261437908497,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.013911290322580643,
"calib/std_conf": 0.09021506938097142,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2762.0,
"completions/max_terminated_length": 2762.0,
"completions/mean_length": 452.859375,
"completions/mean_terminated_length": 454.63531494140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 82.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.1959269791841507,
"learning_rate": 3.6111111111111115e-06,
"loss": 0.1052,
"num_tokens": 14801779.0,
"reward": 1.0146573781967163,
"reward_std": 0.21777372062206268,
"rewards/accuracy_reward_step": 0.37109375,
"rewards/final_brier_reward_step": 0.6445671319961548,
"rewards/format_reward_step": 0.96484375,
"step": 70
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.300020816610072e-06,
"aux_brier/mean_group_std": 0.0498628349326778,
"aux_brier/mean_r": 0.959245374749235,
"aux_brier/n_active_tok": 221.0,
"aux_brier/n_groups": 13.8125,
"aux_brier/n_step_records": 55.25,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5055690692612982,
"calib/avg_num_step_conf": 7.09375,
"calib/ece": 0.31400000000000006,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.008718922033030332,
"calib/mean_conf": 0.12244268774703558,
"calib/mu_c": 0.11741121495327105,
"calib/mu_w": 0.12613013698630138,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.00675889328063241,
"calib/std_conf": 0.09537495007944927,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2343.0,
"completions/max_terminated_length": 2343.0,
"completions/mean_length": 429.6484375,
"completions/mean_terminated_length": 431.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.05317368730902672,
"learning_rate": 3.5833333333333335e-06,
"loss": -0.0008,
"num_tokens": 15016177.0,
"reward": 1.071134328842163,
"reward_std": 0.30423712730407715,
"rewards/accuracy_reward_step": 0.41796875,
"rewards/final_brier_reward_step": 0.6439124345779419,
"rewards/format_reward_step": 0.984375,
"step": 71
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.04259339126645e-07,
"aux_brier/mean_group_std": 0.04461903981817749,
"aux_brier/mean_r": 0.9693644108698314,
"aux_brier/n_active_tok": 226.125,
"aux_brier/n_groups": 13.15625,
"aux_brier/n_step_records": 56.53125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4859782889635565,
"calib/avg_num_step_conf": 7.30078125,
"calib/ece": 0.32679365079365075,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.012595502713879533,
"calib/mean_conf": 0.10852380952380954,
"calib/mu_c": 0.10122641509433963,
"calib/mu_w": 0.11382191780821917,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0073412698412698395,
"calib/std_conf": 0.07964167911435692,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2722.0,
"completions/max_terminated_length": 2722.0,
"completions/mean_length": 430.29296875,
"completions/mean_terminated_length": 431.98040771484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.0768,
"grad_norm": 0.21096296608448029,
"learning_rate": 3.555555555555556e-06,
"loss": 0.0966,
"num_tokens": 15230740.0,
"reward": 1.0624244213104248,
"reward_std": 0.24476802349090576,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/final_brier_reward_step": 0.6325101852416992,
"rewards/format_reward_step": 0.98046875,
"step": 72
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.271813797423478e-07,
"aux_brier/mean_group_std": 0.0449266087964411,
"aux_brier/mean_r": 0.9717982172081372,
"aux_brier/n_active_tok": 189.0,
"aux_brier/n_groups": 10.75,
"aux_brier/n_step_records": 47.25,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.48421515094458045,
"calib/avg_num_step_conf": 5.99609375,
"calib/ece": 0.46781102362204724,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.012478127157471944,
"calib/mean_conf": 0.09840944881889764,
"calib/mu_c": 0.09285815602836879,
"calib/mu_w": 0.10533628318584073,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.005551181102362204,
"calib/std_conf": 0.07283800971162871,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2835.0,
"completions/max_terminated_length": 2835.0,
"completions/mean_length": 405.515625,
"completions/mean_terminated_length": 405.515625,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.009398190304636955,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0035,
"num_tokens": 15441584.0,
"reward": 1.1711393594741821,
"reward_std": 0.2896115183830261,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.5204952955245972,
"rewards/format_reward_step": 0.98046875,
"step": 73
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.378562431459841e-06,
"aux_brier/mean_group_std": 0.02836727570329467,
"aux_brier/mean_r": 0.9733670951190804,
"aux_brier/n_active_tok": 227.0,
"aux_brier/n_groups": 15.6875,
"aux_brier/n_step_records": 56.75,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.514908256880734,
"calib/avg_num_step_conf": 7.25,
"calib/ece": 0.3472409638554217,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.002470511140235901,
"calib/mean_conf": 0.09051004016064257,
"calib/mu_c": 0.09189908256880733,
"calib/mu_w": 0.08942857142857143,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.05520186485193156,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2868.0,
"completions/max_terminated_length": 2868.0,
"completions/mean_length": 460.3671875,
"completions/mean_terminated_length": 460.3671875,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.056083135306835175,
"learning_rate": 3.5e-06,
"loss": 0.1295,
"num_tokens": 15663366.0,
"reward": 1.0636870861053467,
"reward_std": 0.2997918426990509,
"rewards/accuracy_reward_step": 0.42578125,
"rewards/final_brier_reward_step": 0.6141231060028076,
"rewards/format_reward_step": 0.96875,
"step": 74
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.4267376573506851e-06,
"aux_brier/mean_group_std": 0.06698737739295728,
"aux_brier/mean_r": 0.9491829469277482,
"aux_brier/n_active_tok": 209.875,
"aux_brier/n_groups": 11.375,
"aux_brier/n_step_records": 52.46875,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6204035576795434,
"calib/avg_num_step_conf": 6.8203125,
"calib/ece": 0.528235294117647,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02168956590999603,
"calib/mean_conf": 0.10705882352941176,
"calib/mu_c": 0.11496913580246913,
"calib/mu_w": 0.0932795698924731,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.06616213352319744,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1526.0,
"completions/max_terminated_length": 1526.0,
"completions/mean_length": 393.40625,
"completions/mean_terminated_length": 394.94903564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 140.0,
"epoch": 0.08,
"grad_norm": 0.058377038687467575,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.008,
"num_tokens": 15868830.0,
"reward": 1.2541123628616333,
"reward_std": 0.23057867586612701,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.4930119216442108,
"rewards/format_reward_step": 0.99609375,
"step": 75
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.131059898879961e-06,
"aux_brier/mean_group_std": 0.05453229834066376,
"aux_brier/mean_r": 0.9576792566254494,
"aux_brier/n_active_tok": 223.0,
"aux_brier/n_groups": 16.125,
"aux_brier/n_step_records": 55.75,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.48114722507708124,
"calib/avg_num_step_conf": 6.97265625,
"calib/ece": 0.4586852589641434,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.00398406374501992,
"calib/gap": -0.0032759506680370143,
"calib/mean_conf": 0.10282868525896416,
"calib/mu_c": 0.10136690647482015,
"calib/mu_w": 0.10464285714285716,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0038645418326693224,
"calib/std_conf": 0.0881254907808353,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2949.0,
"completions/max_terminated_length": 2949.0,
"completions/mean_length": 465.45703125,
"completions/mean_terminated_length": 465.45703125,
"completions/min_length": 144.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.04971994459629059,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0649,
"num_tokens": 16091043.0,
"reward": 1.162674903869629,
"reward_std": 0.29774484038352966,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5256999731063843,
"rewards/format_reward_step": 0.9765625,
"step": 76
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.984068840806598e-06,
"aux_brier/mean_group_std": 0.05525603786164576,
"aux_brier/mean_r": 0.9603699655860797,
"aux_brier/n_active_tok": 195.125,
"aux_brier/n_groups": 10.9375,
"aux_brier/n_step_records": 48.78125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5084291187739464,
"calib/avg_num_step_conf": 6.296875,
"calib/ece": 0.44645019920318724,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.003996871008939959,
"calib/mean_conf": 0.09538247011952192,
"calib/mu_c": 0.09722962962962962,
"calib/mu_w": 0.09323275862068967,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.00199203187250996,
"calib/std_conf": 0.05840473166406211,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2911.0,
"completions/max_terminated_length": 2911.0,
"completions/mean_length": 378.9765625,
"completions/mean_terminated_length": 381.96063232421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.03173414245247841,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0193,
"num_tokens": 16292725.0,
"reward": 1.1478147506713867,
"reward_std": 0.3370266854763031,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5365713834762573,
"rewards/format_reward_step": 0.97265625,
"step": 77
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.471056108794173e-07,
"aux_brier/mean_group_std": 0.06261163814577028,
"aux_brier/mean_r": 0.9559191122970769,
"aux_brier/n_active_tok": 216.125,
"aux_brier/n_groups": 13.96875,
"aux_brier/n_step_records": 54.03125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5613479262672811,
"calib/avg_num_step_conf": 6.86328125,
"calib/ece": 0.3994336,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.004440706605222736,
"calib/mean_conf": 0.1083264,
"calib/mu_c": 0.11056451612903227,
"calib/mu_w": 0.10612380952380954,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.00588,
"calib/std_conf": 0.08427593549193031,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2662.0,
"completions/max_terminated_length": 2662.0,
"completions/mean_length": 484.21484375,
"completions/mean_terminated_length": 486.1137390136719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.0832,
"grad_norm": 0.026359865441918373,
"learning_rate": 3.3888888888888893e-06,
"loss": 0.0537,
"num_tokens": 16524708.0,
"reward": 1.1097559928894043,
"reward_std": 0.28809860348701477,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.5718364715576172,
"rewards/format_reward_step": 0.96484375,
"step": 78
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.4943519655605826e-07,
"aux_brier/mean_group_std": 0.05061152028513624,
"aux_brier/mean_r": 0.9596157720363296,
"aux_brier/n_active_tok": 227.125,
"aux_brier/n_groups": 14.375,
"aux_brier/n_step_records": 56.78125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5238305941845764,
"calib/avg_num_step_conf": 7.18359375,
"calib/ece": 0.441494071146245,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.010855878634639687,
"calib/mean_conf": 0.11186561264822134,
"calib/mu_c": 0.11671428571428571,
"calib/mu_w": 0.10585840707964603,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.09014137843436602,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1355.0,
"completions/max_terminated_length": 1355.0,
"completions/mean_length": 460.18359375,
"completions/mean_terminated_length": 461.9882507324219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.10766593366861343,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.024,
"num_tokens": 16748891.0,
"reward": 1.178181767463684,
"reward_std": 0.2775796353816986,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.5486649870872498,
"rewards/format_reward_step": 0.98828125,
"step": 79
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.176186380159933e-07,
"aux_brier/mean_group_std": 0.05718390000940116,
"aux_brier/mean_r": 0.9511804906700442,
"aux_brier/n_active_tok": 234.75,
"aux_brier/n_groups": 13.9375,
"aux_brier/n_step_records": 58.6875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5629880937139932,
"calib/avg_num_step_conf": 7.5703125,
"calib/ece": 0.44301185770750984,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.02323620535142744,
"calib/mean_conf": 0.13406324110671938,
"calib/mu_c": 0.1438904109589041,
"calib/mu_w": 0.12065420560747665,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.09186467509443817,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2329.0,
"completions/max_terminated_length": 2329.0,
"completions/mean_length": 410.14453125,
"completions/mean_terminated_length": 411.7529602050781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.08334469795227051,
"learning_rate": 3.3333333333333333e-06,
"loss": -0.0114,
"num_tokens": 16956048.0,
"reward": 1.2005274295806885,
"reward_std": 0.29282620549201965,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.552109956741333,
"rewards/format_reward_step": 0.984375,
"step": 80
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.260690728008498e-07,
"aux_brier/mean_group_std": 0.053407029620857686,
"aux_brier/mean_r": 0.9566165149535798,
"aux_brier/n_active_tok": 238.875,
"aux_brier/n_groups": 15.03125,
"aux_brier/n_step_records": 59.71875,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5560224089635853,
"calib/avg_num_step_conf": 7.76171875,
"calib/ece": 0.4004979591836735,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0008958916900093095,
"calib/mean_conf": 0.1310938775510204,
"calib/mu_c": 0.1306587301587302,
"calib/mu_w": 0.1315546218487395,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.008653061224489797,
"calib/std_conf": 0.08097116747655067,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2931.0,
"completions/max_terminated_length": 2931.0,
"completions/mean_length": 455.3203125,
"completions/mean_terminated_length": 462.5476379394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.0864,
"grad_norm": 0.14797793328762054,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.0465,
"num_tokens": 17178858.0,
"reward": 1.1143836975097656,
"reward_std": 0.28239357471466064,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5669094324111938,
"rewards/format_reward_step": 0.953125,
"step": 81
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -5.401173662766823e-07,
"aux_brier/mean_group_std": 0.0419831660674612,
"aux_brier/mean_r": 0.9644077503974919,
"aux_brier/n_active_tok": 198.625,
"aux_brier/n_groups": 11.3125,
"aux_brier/n_step_records": 49.65625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5053940568475452,
"calib/avg_num_step_conf": 6.2421875,
"calib/ece": 0.404433734939759,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005236821705426345,
"calib/mean_conf": 0.12142971887550202,
"calib/mu_c": 0.12395348837209302,
"calib/mu_w": 0.11871666666666668,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.003895582329317269,
"calib/std_conf": 0.07295184691420815,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2693.0,
"completions/max_terminated_length": 2693.0,
"completions/mean_length": 410.90234375,
"completions/mean_terminated_length": 412.5137634277344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.022786911576986313,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0576,
"num_tokens": 17389601.0,
"reward": 1.1237928867340088,
"reward_std": 0.2836204171180725,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.5654839277267456,
"rewards/format_reward_step": 0.95703125,
"step": 82
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.690216071466912e-06,
"aux_brier/mean_group_std": 0.056603451192411613,
"aux_brier/mean_r": 0.9445548420950379,
"aux_brier/n_active_tok": 248.375,
"aux_brier/n_groups": 15.40625,
"aux_brier/n_step_records": 62.09375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4390629946185502,
"calib/avg_num_step_conf": 8.0,
"calib/ece": 0.3851011904761904,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.02184586894586893,
"calib/mean_conf": 0.16394642857142858,
"calib/mu_c": 0.15380370370370372,
"calib/mu_w": 0.17564957264957265,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.006666666666666666,
"calib/std_conf": 0.09272470619009018,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2530.0,
"completions/max_terminated_length": 2530.0,
"completions/mean_length": 511.10546875,
"completions/mean_terminated_length": 515.1299438476562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.06263367086648941,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0592,
"num_tokens": 17627708.0,
"reward": 1.1604933738708496,
"reward_std": 0.218886137008667,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5794734358787537,
"rewards/format_reward_step": 0.9765625,
"step": 83
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.366732909270702e-07,
"aux_brier/mean_group_std": 0.06194382538811641,
"aux_brier/mean_r": 0.9414539129239293,
"aux_brier/n_active_tok": 228.25,
"aux_brier/n_groups": 14.0,
"aux_brier/n_step_records": 57.0625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5344127062299926,
"calib/avg_num_step_conf": 7.13671875,
"calib/ece": 0.3483137254901961,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010968973159320355,
"calib/mean_conf": 0.18321568627450982,
"calib/mu_c": 0.18854961832061065,
"calib/mu_w": 0.1775806451612903,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.008901960784313726,
"calib/std_conf": 0.1281721137156322,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3005.0,
"completions/max_terminated_length": 3005.0,
"completions/mean_length": 431.24609375,
"completions/mean_terminated_length": 431.24609375,
"completions/min_length": 122.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.0896,
"grad_norm": 0.06950856745243073,
"learning_rate": 3.2222222222222227e-06,
"loss": -0.0013,
"num_tokens": 17844027.0,
"reward": 1.1666513681411743,
"reward_std": 0.23589617013931274,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.6275429725646973,
"rewards/format_reward_step": 0.99609375,
"step": 84
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.996977371416136e-07,
"aux_brier/mean_group_std": 0.057099130091725056,
"aux_brier/mean_r": 0.9383193974535409,
"aux_brier/n_active_tok": 234.5,
"aux_brier/n_groups": 14.6875,
"aux_brier/n_step_records": 58.625,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4952307790794443,
"calib/avg_num_step_conf": 7.328125,
"calib/ece": 0.34376,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.016674988797132084,
"calib/mean_conf": 0.20096,
"calib/mu_c": 0.19275590551181102,
"calib/mu_w": 0.2094308943089431,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.01836,
"calib/std_conf": 0.12515222091517192,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2906.0,
"completions/max_terminated_length": 2906.0,
"completions/mean_length": 496.01171875,
"completions/mean_terminated_length": 497.9568786621094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.4499802589416504,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0335,
"num_tokens": 18078830.0,
"reward": 1.1386210918426514,
"reward_std": 0.23980866372585297,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.6169843673706055,
"rewards/format_reward_step": 0.9765625,
"step": 85
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.5307811555297377e-06,
"aux_brier/mean_group_std": 0.06543589677221735,
"aux_brier/mean_r": 0.9326995076108595,
"aux_brier/n_active_tok": 233.5,
"aux_brier/n_groups": 15.21875,
"aux_brier/n_step_records": 58.375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.593401722391084,
"calib/avg_num_step_conf": 7.40234375,
"calib/ece": 0.25393280632411064,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.03494680851063825,
"calib/mean_conf": 0.21302371541501974,
"calib/mu_c": 0.23249999999999998,
"calib/mu_w": 0.19755319148936173,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.012134387351778657,
"calib/std_conf": 0.14985911509911753,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2390.0,
"completions/max_terminated_length": 2390.0,
"completions/mean_length": 490.1640625,
"completions/mean_terminated_length": 492.0863037109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.33087992668151855,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0163,
"num_tokens": 18309824.0,
"reward": 1.0977025032043457,
"reward_std": 0.2513139843940735,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6798721551895142,
"rewards/format_reward_step": 0.98046875,
"step": 86
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.671883360922834e-06,
"aux_brier/mean_group_std": 0.06403182758078232,
"aux_brier/mean_r": 0.9349012693869212,
"aux_brier/n_active_tok": 189.875,
"aux_brier/n_groups": 10.15625,
"aux_brier/n_step_records": 47.46875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5668133166984044,
"calib/avg_num_step_conf": 5.93359375,
"calib/ece": 0.4348228346456692,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02279499638846938,
"calib/mean_conf": 0.1891929133858268,
"calib/mu_c": 0.19789808917197452,
"calib/mu_w": 0.17510309278350514,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.002952755905511811,
"calib/std_conf": 0.11869134767442834,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1478.0,
"completions/max_terminated_length": 1478.0,
"completions/mean_length": 389.29296875,
"completions/mean_terminated_length": 390.81964111328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.0928,
"grad_norm": 0.055273041129112244,
"learning_rate": 3.138888888888889e-06,
"loss": 0.024,
"num_tokens": 18514979.0,
"reward": 1.2524123191833496,
"reward_std": 0.23912067711353302,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.5721486806869507,
"rewards/format_reward_step": 0.9921875,
"step": 87
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.346658429213221e-07,
"aux_brier/mean_group_std": 0.05874770184713301,
"aux_brier/mean_r": 0.9336913212429232,
"aux_brier/n_active_tok": 225.125,
"aux_brier/n_groups": 13.3125,
"aux_brier/n_step_records": 56.28125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6215100523098254,
"calib/avg_num_step_conf": 7.0625,
"calib/ece": 0.2942039682539682,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.007936507936507936,
"calib/gap": 0.04145853658536586,
"calib/mean_conf": 0.2364309523809524,
"calib/mu_c": 0.25666666666666665,
"calib/mu_w": 0.2152081300813008,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.009365079365079364,
"calib/std_conf": 0.1510091760926214,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2739.0,
"completions/max_terminated_length": 2739.0,
"completions/mean_length": 483.390625,
"completions/mean_terminated_length": 483.390625,
"completions/min_length": 195.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.3894507586956024,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.082,
"num_tokens": 18748575.0,
"reward": 1.1596109867095947,
"reward_std": 0.22322949767112732,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.6540689468383789,
"rewards/format_reward_step": 0.9765625,
"step": 88
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.5983053953116944e-06,
"aux_brier/mean_group_std": 0.05685648627614638,
"aux_brier/mean_r": 0.9327129965170959,
"aux_brier/n_active_tok": 226.125,
"aux_brier/n_groups": 12.96875,
"aux_brier/n_step_records": 56.53125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5610586011342154,
"calib/avg_num_step_conf": 7.1875,
"calib/ece": 0.23624505928853756,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.020043478260869607,
"calib/mean_conf": 0.24628458498023717,
"calib/mu_c": 0.2572173913043479,
"calib/mu_w": 0.2371739130434783,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.013992094861660078,
"calib/std_conf": 0.1400212788724654,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2247.0,
"completions/max_terminated_length": 2247.0,
"completions/mean_length": 493.38671875,
"completions/mean_terminated_length": 497.2716369628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.4990207254886627,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.0208,
"num_tokens": 18983770.0,
"reward": 1.1160681247711182,
"reward_std": 0.24893710017204285,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.6908347606658936,
"rewards/format_reward_step": 0.98828125,
"step": 89
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.048488792737006e-07,
"aux_brier/mean_group_std": 0.08312437496410667,
"aux_brier/mean_r": 0.9000635924668137,
"aux_brier/n_active_tok": 231.0,
"aux_brier/n_groups": 13.3125,
"aux_brier/n_step_records": 57.75,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.45340114431023515,
"calib/avg_num_step_conf": 7.23828125,
"calib/ece": 0.3158498023715415,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": -0.027391608391608313,
"calib/mean_conf": 0.2889723320158103,
"calib/mu_c": 0.2770629370629371,
"calib/mu_w": 0.3044545454545454,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.019802371541501967,
"calib/std_conf": 0.1606869247077436,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2975.0,
"completions/max_terminated_length": 2975.0,
"completions/mean_length": 466.8515625,
"completions/mean_terminated_length": 466.8515625,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.096,
"grad_norm": 0.2098333239555359,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0215,
"num_tokens": 19206604.0,
"reward": 1.2144343852996826,
"reward_std": 0.2846514880657196,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.631174623966217,
"rewards/format_reward_step": 0.98828125,
"step": 90
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.631682916759864e-07,
"aux_brier/mean_group_std": 0.05885228361090886,
"aux_brier/mean_r": 0.9083179479811493,
"aux_brier/n_active_tok": 241.5,
"aux_brier/n_groups": 14.15625,
"aux_brier/n_step_records": 60.375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5149176954732511,
"calib/avg_num_step_conf": 7.6328125,
"calib/ece": 0.29948412698412696,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.011904761904761904,
"calib/gap": -0.004629629629629595,
"calib/mean_conf": 0.3128174603174603,
"calib/mu_c": 0.31083333333333335,
"calib/mu_w": 0.31546296296296295,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.02043650793650794,
"calib/std_conf": 0.16148737730383342,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2512.0,
"completions/max_terminated_length": 2512.0,
"completions/mean_length": 488.08984375,
"completions/mean_terminated_length": 488.08984375,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.10228096693754196,
"learning_rate": 3.0277777777777776e-06,
"loss": 0.0142,
"num_tokens": 19439267.0,
"reward": 1.2170789241790771,
"reward_std": 0.24731676280498505,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.6495659947395325,
"rewards/format_reward_step": 0.984375,
"step": 91
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.660268711331514e-06,
"aux_brier/mean_group_std": 0.041815796396915884,
"aux_brier/mean_r": 0.9200492032263131,
"aux_brier/n_active_tok": 218.0,
"aux_brier/n_groups": 12.875,
"aux_brier/n_step_records": 54.5,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.6347257653061225,
"calib/avg_num_step_conf": 6.9375,
"calib/ece": 0.2732539682539682,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.011904761904761904,
"calib/gap": 0.06925000000000003,
"calib/mean_conf": 0.29365079365079366,
"calib/mu_c": 0.32442857142857146,
"calib/mu_w": 0.2551785714285714,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.005674603174603174,
"calib/std_conf": 0.17074813519910925,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2866.0,
"completions/max_terminated_length": 2866.0,
"completions/mean_length": 472.7734375,
"completions/mean_terminated_length": 474.6274719238281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.3245234787464142,
"learning_rate": 3e-06,
"loss": -0.0329,
"num_tokens": 19667017.0,
"reward": 1.204309344291687,
"reward_std": 0.27141866087913513,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.6766124963760376,
"rewards/format_reward_step": 0.9765625,
"step": 92
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.402533863665184e-07,
"aux_brier/mean_group_std": 0.0812354678954359,
"aux_brier/mean_r": 0.8682917187290476,
"aux_brier/n_active_tok": 267.0,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 66.75,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.36983497102544727,
"calib/avg_num_step_conf": 8.609375,
"calib/ece": 0.30675198412698407,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05555555555555555,
"calib/gap": -0.11300396825396819,
"calib/mean_conf": 0.43023214285714284,
"calib/mu_c": 0.3737301587301588,
"calib/mu_w": 0.486734126984127,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11849206349206348,
"calib/std_conf": 0.22850982638072923,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2613.0,
"completions/max_terminated_length": 2613.0,
"completions/mean_length": 525.28125,
"completions/mean_terminated_length": 527.3411865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.0992,
"grad_norm": 0.1926182359457016,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0354,
"num_tokens": 19907265.0,
"reward": 1.138358235359192,
"reward_std": 0.2905207872390747,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.6237451434135437,
"rewards/format_reward_step": 0.98046875,
"step": 93
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.345957592664766e-07,
"aux_brier/mean_group_std": 0.07341426312216502,
"aux_brier/mean_r": 0.8903702374658317,
"aux_brier/n_active_tok": 229.125,
"aux_brier/n_groups": 12.625,
"aux_brier/n_step_records": 57.28125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5557502189963708,
"calib/avg_num_step_conf": 7.28515625,
"calib/ece": 0.14703557312252963,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.015810276679841896,
"calib/gap": 0.04438555875359779,
"calib/mean_conf": 0.3852964426877471,
"calib/mu_c": 0.40827868852459015,
"calib/mu_w": 0.36389312977099236,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.02505928853754942,
"calib/std_conf": 0.18191887570871856,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2590.0,
"completions/max_terminated_length": 2590.0,
"completions/mean_length": 445.7734375,
"completions/mean_terminated_length": 449.2834777832031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.09607866406440735,
"learning_rate": 2.944444444444445e-06,
"loss": -0.0054,
"num_tokens": 20130063.0,
"reward": 1.1483310461044312,
"reward_std": 0.21412217617034912,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.718323826789856,
"rewards/format_reward_step": 0.984375,
"step": 94
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.395376028279216e-06,
"aux_brier/mean_group_std": 0.08585101812496332,
"aux_brier/mean_r": 0.859334020540006,
"aux_brier/n_active_tok": 256.5,
"aux_brier/n_groups": 14.03125,
"aux_brier/n_step_records": 64.125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5002564102564103,
"calib/avg_num_step_conf": 8.171875,
"calib/ece": 0.25031496062992126,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.05511811023622047,
"calib/gap": 0.0030397435897436265,
"calib/mean_conf": 0.4236220472440945,
"calib/mu_c": 0.42486666666666667,
"calib/mu_w": 0.42182692307692304,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04169291338582677,
"calib/std_conf": 0.22089891261085734,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1620.0,
"completions/max_terminated_length": 1620.0,
"completions/mean_length": 509.57421875,
"completions/mean_terminated_length": 513.5866088867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.08091621100902557,
"learning_rate": 2.916666666666667e-06,
"loss": -0.0238,
"num_tokens": 20366642.0,
"reward": 1.2514491081237793,
"reward_std": 0.2802199125289917,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.6776718497276306,
"rewards/format_reward_step": 0.9921875,
"step": 95
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.531609392761538e-07,
"aux_brier/mean_group_std": 0.08987008361879134,
"aux_brier/mean_r": 0.8687645458862787,
"aux_brier/n_active_tok": 244.25,
"aux_brier/n_groups": 12.53125,
"aux_brier/n_step_records": 61.0625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5170045345425447,
"calib/avg_num_step_conf": 7.65625,
"calib/ece": 0.2505882352941176,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.01568627450980392,
"calib/gap": 0.017892104561216482,
"calib/mean_conf": 0.42698039215686273,
"calib/mu_c": 0.43343558282208594,
"calib/mu_w": 0.41554347826086946,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.01917647058823529,
"calib/std_conf": 0.19564849334678205,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1141.0,
"completions/max_terminated_length": 1141.0,
"completions/mean_length": 478.625,
"completions/mean_terminated_length": 478.625,
"completions/min_length": 197.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.1024,
"grad_norm": 0.06811456382274628,
"learning_rate": 2.888888888888889e-06,
"loss": -0.0034,
"num_tokens": 20594986.0,
"reward": 1.3025823831558228,
"reward_std": 0.2817477583885193,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/final_brier_reward_step": 0.6868917942047119,
"rewards/format_reward_step": 0.98828125,
"step": 96
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.6849083896430805e-06,
"aux_brier/mean_group_std": 0.0853580985421658,
"aux_brier/mean_r": 0.8620290815106396,
"aux_brier/n_active_tok": 253.375,
"aux_brier/n_groups": 14.0,
"aux_brier/n_step_records": 63.34375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.42947276361819087,
"calib/avg_num_step_conf": 8.16796875,
"calib/ece": 0.2601574803149606,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05905511811023622,
"calib/gap": -0.04818715642178917,
"calib/mean_conf": 0.4506299212598425,
"calib/mu_c": 0.4286231884057971,
"calib/mu_w": 0.4768103448275863,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.08374015748031496,
"calib/std_conf": 0.21283888413722996,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2624.0,
"completions/max_terminated_length": 2624.0,
"completions/mean_length": 484.8125,
"completions/mean_terminated_length": 484.8125,
"completions/min_length": 110.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.036569032818078995,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0255,
"num_tokens": 20824170.0,
"reward": 1.196521520614624,
"reward_std": 0.31164079904556274,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.6610859632492065,
"rewards/format_reward_step": 0.984375,
"step": 97
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.166580990696886e-07,
"aux_brier/mean_group_std": 0.1180901782807219,
"aux_brier/mean_r": 0.8158591475667646,
"aux_brier/n_active_tok": 273.0,
"aux_brier/n_groups": 15.96875,
"aux_brier/n_step_records": 68.25,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5150348730096065,
"calib/avg_num_step_conf": 8.66015625,
"calib/ece": 0.2241633466135458,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.07569721115537849,
"calib/gap": 0.017927687853665053,
"calib/mean_conf": 0.5215737051792828,
"calib/mu_c": 0.5288590604026846,
"calib/mu_w": 0.5109313725490195,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.07605577689243029,
"calib/std_conf": 0.22639741691167684,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2598.0,
"completions/max_terminated_length": 2598.0,
"completions/mean_length": 514.52734375,
"completions/mean_terminated_length": 518.5787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.0860593393445015,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0307,
"num_tokens": 21062073.0,
"reward": 1.243653655052185,
"reward_std": 0.335063099861145,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.6933647394180298,
"rewards/format_reward_step": 0.9765625,
"step": 98
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -9.718932159064053e-09,
"aux_brier/mean_group_std": 0.09900352003789668,
"aux_brier/mean_r": 0.8149418265467578,
"aux_brier/n_active_tok": 279.125,
"aux_brier/n_groups": 14.84375,
"aux_brier/n_step_records": 69.78125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.46830357142857143,
"calib/avg_num_step_conf": 8.7890625,
"calib/ece": 0.2847366533864541,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.09163346613545817,
"calib/gap": -0.01995292582417585,
"calib/mean_conf": 0.5323948207171315,
"calib/mu_c": 0.5196758241758241,
"calib/mu_w": 0.53962875,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.22729083665338642,
"calib/std_conf": 0.24678504189971875,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1831.0,
"completions/max_terminated_length": 1831.0,
"completions/mean_length": 570.921875,
"completions/mean_terminated_length": 575.4172973632812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.1056,
"grad_norm": 0.19342589378356934,
"learning_rate": 2.805555555555556e-06,
"loss": 0.0101,
"num_tokens": 21314029.0,
"reward": 1.004584789276123,
"reward_std": 0.2824621796607971,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.6511518955230713,
"rewards/format_reward_step": 0.97265625,
"step": 99
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -5.571975909002269e-07,
"aux_brier/mean_group_std": 0.10170921841231911,
"aux_brier/mean_r": 0.8233811362389352,
"aux_brier/n_active_tok": 266.375,
"aux_brier/n_groups": 13.28125,
"aux_brier/n_step_records": 66.59375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5978111319574735,
"calib/avg_num_step_conf": 8.5546875,
"calib/ece": 0.12350434782608696,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.05928853754940711,
"calib/gap": 0.0627040775484678,
"calib/mean_conf": 0.5267723320158103,
"calib/mu_c": 0.5572569230769231,
"calib/mu_w": 0.4945528455284553,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.06822134387351778,
"calib/std_conf": 0.22034138233223347,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1941.0,
"completions/max_terminated_length": 1941.0,
"completions/mean_length": 556.44921875,
"completions/mean_terminated_length": 560.8306884765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.12148700654506683,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0292,
"num_tokens": 21563888.0,
"reward": 1.178086280822754,
"reward_std": 0.2891013026237488,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.7201579809188843,
"rewards/format_reward_step": 0.98046875,
"step": 100
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.9718142124934843e-07,
"aux_brier/mean_group_std": 0.10433184295646486,
"aux_brier/mean_r": 0.8045303526163216,
"aux_brier/n_active_tok": 298.75,
"aux_brier/n_groups": 15.71875,
"aux_brier/n_step_records": 74.6875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6049597423510467,
"calib/avg_num_step_conf": 9.80859375,
"calib/ece": 0.16257200000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.14,
"calib/gap": 0.08663574879227043,
"calib/mean_conf": 0.570308,
"calib/mu_c": 0.617091304347826,
"calib/mu_w": 0.5304555555555556,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.13644,
"calib/std_conf": 0.2481456329174463,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2633.0,
"completions/max_terminated_length": 2633.0,
"completions/mean_length": 628.94140625,
"completions/mean_terminated_length": 631.4078979492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 193.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.19665689766407013,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0347,
"num_tokens": 21831889.0,
"reward": 1.1090095043182373,
"reward_std": 0.32522842288017273,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.701663076877594,
"rewards/format_reward_step": 0.96875,
"step": 101
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.312684778271183e-07,
"aux_brier/mean_group_std": 0.09773476679729748,
"aux_brier/mean_r": 0.8320598076183132,
"aux_brier/n_active_tok": 259.75,
"aux_brier/n_groups": 13.25,
"aux_brier/n_step_records": 64.9375,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4508320230607966,
"calib/avg_num_step_conf": 8.2265625,
"calib/ece": 0.22388235294117642,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.07058823529411765,
"calib/gap": -0.03742924528301894,
"calib/mean_conf": 0.5354117647058824,
"calib/mu_c": 0.521320754716981,
"calib/mu_w": 0.55875,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.06788235294117642,
"calib/std_conf": 0.20693971618831775,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1541.0,
"completions/max_terminated_length": 1541.0,
"completions/mean_length": 489.76953125,
"completions/mean_terminated_length": 491.6902160644531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.1088,
"grad_norm": 1.647596836090088,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.0106,
"num_tokens": 22063966.0,
"reward": 1.2793034315109253,
"reward_std": 0.24243158102035522,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.6797136664390564,
"rewards/format_reward_step": 0.9765625,
"step": 102
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.5484969873025278e-07,
"aux_brier/mean_group_std": 0.10507102402294292,
"aux_brier/mean_r": 0.7947326888876801,
"aux_brier/n_active_tok": 278.5,
"aux_brier/n_groups": 15.3125,
"aux_brier/n_step_records": 69.625,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.45345744680851063,
"calib/avg_num_step_conf": 9.40625,
"calib/ece": 0.23202448979591836,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.10204081632653061,
"calib/gap": -0.0491362520458265,
"calib/mean_conf": 0.5712408163265307,
"calib/mu_c": 0.5503829787234042,
"calib/mu_w": 0.5995192307692307,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11387755102040815,
"calib/std_conf": 0.24214982115427608,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2801.0,
"completions/max_terminated_length": 2801.0,
"completions/mean_length": 648.47265625,
"completions/mean_terminated_length": 653.5787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.21203476190567017,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.0441,
"num_tokens": 22334527.0,
"reward": 1.18772554397583,
"reward_std": 0.30444949865341187,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.6415274143218994,
"rewards/format_reward_step": 0.953125,
"step": 103
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.8911112323902763e-07,
"aux_brier/mean_group_std": 0.11028402334238388,
"aux_brier/mean_r": 0.7920487339286056,
"aux_brier/n_active_tok": 295.5,
"aux_brier/n_groups": 14.96875,
"aux_brier/n_step_records": 73.875,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.6061949746160272,
"calib/avg_num_step_conf": 9.703125,
"calib/ece": 0.16158799999999998,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.116,
"calib/gap": 0.07995263800526964,
"calib/mean_conf": 0.616884,
"calib/mu_c": 0.6594188034188034,
"calib/mu_w": 0.5794661654135338,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15523599999999996,
"calib/std_conf": 0.20940274722171148,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2012.0,
"completions/max_terminated_length": 2012.0,
"completions/mean_length": 574.453125,
"completions/mean_terminated_length": 581.2648315429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 226.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.04694162309169769,
"learning_rate": 2.666666666666667e-06,
"loss": -0.0168,
"num_tokens": 22588267.0,
"reward": 1.1170382499694824,
"reward_std": 0.34029483795166016,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.7025283575057983,
"rewards/format_reward_step": 0.96875,
"step": 104
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.495803599622537e-07,
"aux_brier/mean_group_std": 0.10375383807398324,
"aux_brier/mean_r": 0.7698952471212377,
"aux_brier/n_active_tok": 300.125,
"aux_brier/n_groups": 16.8125,
"aux_brier/n_step_records": 75.03125,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.45430000000000004,
"calib/avg_num_step_conf": 9.7734375,
"calib/ece": 0.25625306122448976,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.1673469387755102,
"calib/gap": -0.03667066666666652,
"calib/mean_conf": 0.6072571428571429,
"calib/mu_c": 0.589296,
"calib/mu_w": 0.6259666666666666,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1766530612244898,
"calib/std_conf": 0.24209441824820904,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2846.0,
"completions/max_terminated_length": 2846.0,
"completions/mean_length": 622.203125,
"completions/mean_terminated_length": 622.203125,
"completions/min_length": 200.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.112,
"grad_norm": 0.9290190935134888,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0393,
"num_tokens": 22853311.0,
"reward": 1.1226959228515625,
"reward_std": 0.3886973261833191,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.6314086318016052,
"rewards/format_reward_step": 0.953125,
"step": 105
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.141134875932043e-07,
"aux_brier/mean_group_std": 0.11222350522736028,
"aux_brier/mean_r": 0.7831322035965229,
"aux_brier/n_active_tok": 287.875,
"aux_brier/n_groups": 13.4375,
"aux_brier/n_step_records": 71.96875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5274049918474852,
"calib/avg_num_step_conf": 9.328125,
"calib/ece": 0.20736363636363642,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.08695652173913043,
"calib/gap": 0.03241063589614945,
"calib/mean_conf": 0.6360355731225296,
"calib/mu_c": 0.6532016806722689,
"calib/mu_w": 0.6207910447761195,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18652173913043485,
"calib/std_conf": 0.21853195676115036,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2327.0,
"completions/max_terminated_length": 2327.0,
"completions/mean_length": 548.98046875,
"completions/mean_terminated_length": 551.1333618164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.5100629925727844,
"learning_rate": 2.6111111111111113e-06,
"loss": 0.0564,
"num_tokens": 23098434.0,
"reward": 1.1276081800460815,
"reward_std": 0.3030830919742584,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.6823076009750366,
"rewards/format_reward_step": 0.984375,
"step": 106
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.481459785459329e-07,
"aux_brier/mean_group_std": 0.11460418211394487,
"aux_brier/mean_r": 0.7763244199315831,
"aux_brier/n_active_tok": 300.875,
"aux_brier/n_groups": 15.53125,
"aux_brier/n_step_records": 75.21875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5704158790170133,
"calib/avg_num_step_conf": 9.7890625,
"calib/ece": 0.1571608695652174,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.13438735177865613,
"calib/gap": 0.046667391304347716,
"calib/mean_conf": 0.6392027667984189,
"calib/mu_c": 0.6604152173913044,
"calib/mu_w": 0.6137478260869567,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.12545454545454543,
"calib/std_conf": 0.2236818974731012,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2471.0,
"completions/max_terminated_length": 2471.0,
"completions/mean_length": 563.6171875,
"completions/mean_terminated_length": 563.6171875,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.5186272263526917,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0295,
"num_tokens": 23347336.0,
"reward": 1.210200548171997,
"reward_std": 0.32997098565101624,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.707989513874054,
"rewards/format_reward_step": 0.98828125,
"step": 107
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.08463159421391e-08,
"aux_brier/mean_group_std": 0.128515429104101,
"aux_brier/mean_r": 0.7434579599357282,
"aux_brier/n_active_tok": 313.875,
"aux_brier/n_groups": 14.8125,
"aux_brier/n_step_records": 78.46875,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5340425531914893,
"calib/avg_num_step_conf": 10.25,
"calib/ece": 0.20487831325301203,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.19678714859437751,
"calib/gap": 0.008639299931366029,
"calib/mean_conf": 0.6878927710843372,
"calib/mu_c": 0.6911541935483873,
"calib/mu_w": 0.6825148936170212,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.13514056224899598,
"calib/std_conf": 0.24153244676297436,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2523.0,
"completions/max_terminated_length": 2523.0,
"completions/mean_length": 610.79296875,
"completions/mean_terminated_length": 613.1882934570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.1152,
"grad_norm": 0.39098060131073,
"learning_rate": 2.5555555555555557e-06,
"loss": -0.0083,
"num_tokens": 23606931.0,
"reward": 1.2635798454284668,
"reward_std": 0.39778855443000793,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.6871317625045776,
"rewards/format_reward_step": 0.97265625,
"step": 108
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.051111352822769e-07,
"aux_brier/mean_group_std": 0.1244642150956407,
"aux_brier/mean_r": 0.7541600910765421,
"aux_brier/n_active_tok": 332.125,
"aux_brier/n_groups": 17.78125,
"aux_brier/n_step_records": 83.03125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5911488117931123,
"calib/avg_num_step_conf": 11.25,
"calib/ece": 0.21940796812749003,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.16733067729083664,
"calib/gap": 0.07328400050832384,
"calib/mean_conf": 0.6489984063745019,
"calib/mu_c": 0.6866622950819672,
"calib/mu_w": 0.6133782945736433,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.19117529880478085,
"calib/std_conf": 0.24650619929786974,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2683.0,
"completions/max_terminated_length": 2683.0,
"completions/mean_length": 621.921875,
"completions/mean_terminated_length": 626.8189086914062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 220.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.15936784446239471,
"learning_rate": 2.5277777777777778e-06,
"loss": 0.0116,
"num_tokens": 23870743.0,
"reward": 1.1357372999191284,
"reward_std": 0.2178267389535904,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.6835745573043823,
"rewards/format_reward_step": 0.9765625,
"step": 109
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.5052846808779634e-07,
"aux_brier/mean_group_std": 0.10248965500909708,
"aux_brier/mean_r": 0.7875876710416372,
"aux_brier/n_active_tok": 290.625,
"aux_brier/n_groups": 15.40625,
"aux_brier/n_step_records": 72.65625,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4687301587301588,
"calib/avg_num_step_conf": 9.5,
"calib/ece": 0.2398446215139442,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.11952191235059761,
"calib/gap": -0.019774412698412625,
"calib/mean_conf": 0.6366494023904384,
"calib/mu_c": 0.6268015873015873,
"calib/mu_w": 0.6465759999999999,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18725099601593626,
"calib/std_conf": 0.21088690675307917,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1493.0,
"completions/max_terminated_length": 1493.0,
"completions/mean_length": 536.26953125,
"completions/mean_terminated_length": 542.6284790039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.22146645188331604,
"learning_rate": 2.5e-06,
"loss": -0.0046,
"num_tokens": 24112948.0,
"reward": 1.1484915018081665,
"reward_std": 0.37460654973983765,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.6642783880233765,
"rewards/format_reward_step": 0.98046875,
"step": 110
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.2171949545503047e-07,
"aux_brier/mean_group_std": 0.1267809938259536,
"aux_brier/mean_r": 0.7636379715682697,
"aux_brier/n_active_tok": 313.875,
"aux_brier/n_groups": 15.34375,
"aux_brier/n_step_records": 78.46875,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.5169160487288136,
"calib/avg_num_step_conf": 10.9140625,
"calib/ece": 0.2302670731707318,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.15853658536585366,
"calib/gap": 0.019075105932203407,
"calib/mean_conf": 0.6572939024390244,
"calib/mu_c": 0.66644375,
"calib/mu_w": 0.6473686440677966,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18361788617886188,
"calib/std_conf": 0.2449877352235704,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3012.0,
"completions/max_terminated_length": 3012.0,
"completions/mean_length": 628.02734375,
"completions/mean_terminated_length": 632.972412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.1184,
"grad_norm": 0.18761365115642548,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0441,
"num_tokens": 24381131.0,
"reward": 1.141373872756958,
"reward_std": 0.3451157510280609,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.6514326333999634,
"rewards/format_reward_step": 0.95703125,
"step": 111
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.043227629311417e-07,
"aux_brier/mean_group_std": 0.13559656376496837,
"aux_brier/mean_r": 0.7448649676055553,
"aux_brier/n_active_tok": 318.0,
"aux_brier/n_groups": 14.78125,
"aux_brier/n_step_records": 79.5,
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.5378966692892735,
"calib/avg_num_step_conf": 10.78125,
"calib/ece": 0.2578825910931175,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.1902834008097166,
"calib/gap": 0.036944105691056994,
"calib/mean_conf": 0.7025222672064778,
"calib/mu_c": 0.721069105691057,
"calib/mu_w": 0.684125,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23121457489878552,
"calib/std_conf": 0.2355327592464806,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2640.0,
"completions/max_terminated_length": 2640.0,
"completions/mean_length": 619.5546875,
"completions/mean_terminated_length": 634.4240112304688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.6164528131484985,
"learning_rate": 2.4444444444444447e-06,
"loss": -0.0143,
"num_tokens": 24647657.0,
"reward": 1.1225578784942627,
"reward_std": 0.3095180094242096,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.6464813947677612,
"rewards/format_reward_step": 0.9609375,
"step": 112
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.451427569942325e-08,
"aux_brier/mean_group_std": 0.12511360426090087,
"aux_brier/mean_r": 0.7727980755519827,
"aux_brier/n_active_tok": 305.375,
"aux_brier/n_groups": 14.125,
"aux_brier/n_step_records": 76.34375,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5654392764857882,
"calib/avg_num_step_conf": 10.3515625,
"calib/ece": 0.20167550200803208,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.18072289156626506,
"calib/gap": 0.04605951550387588,
"calib/mean_conf": 0.6883646586345382,
"calib/mu_c": 0.7105620155038759,
"calib/mu_w": 0.6645025,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18598393574297184,
"calib/std_conf": 0.20727755705483025,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3038.0,
"completions/max_terminated_length": 3038.0,
"completions/mean_length": 571.96484375,
"completions/mean_terminated_length": 578.7470703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.04625099152326584,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0577,
"num_tokens": 24899280.0,
"reward": 1.1564178466796875,
"reward_std": 0.36667174100875854,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.6803587675094604,
"rewards/format_reward_step": 0.96484375,
"step": 113
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.7431586325252795e-08,
"aux_brier/mean_group_std": 0.1271221744337986,
"aux_brier/mean_r": 0.7559673133237643,
"aux_brier/n_active_tok": 307.25,
"aux_brier/n_groups": 15.0625,
"aux_brier/n_step_records": 76.8125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.43421309872922775,
"calib/avg_num_step_conf": 10.01171875,
"calib/ece": 0.26276968503937015,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.2125984251968504,
"calib/gap": -0.053843369175627154,
"calib/mean_conf": 0.708253937007874,
"calib/mu_c": 0.6872677419354839,
"calib/mu_w": 0.741111111111111,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1803937007874016,
"calib/std_conf": 0.20985623004249734,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1478.0,
"completions/max_terminated_length": 1478.0,
"completions/mean_length": 555.5625,
"completions/mean_terminated_length": 557.7412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.1216,
"grad_norm": 0.29303163290023804,
"learning_rate": 2.388888888888889e-06,
"loss": -0.0013,
"num_tokens": 25146528.0,
"reward": 1.2709516286849976,
"reward_std": 0.2983520030975342,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.6775567531585693,
"rewards/format_reward_step": 0.9921875,
"step": 114
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.079227509639182e-07,
"aux_brier/mean_group_std": 0.12487037779668066,
"aux_brier/mean_r": 0.744679790723911,
"aux_brier/n_active_tok": 311.375,
"aux_brier/n_groups": 15.75,
"aux_brier/n_step_records": 77.84375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5327266922094509,
"calib/avg_num_step_conf": 10.03515625,
"calib/ece": 0.2260509960159363,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.17928286852589642,
"calib/gap": 0.022761749680715204,
"calib/mean_conf": 0.7084509960159362,
"calib/mu_c": 0.7189703703703704,
"calib/mu_w": 0.6962086206896552,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1983266932270917,
"calib/std_conf": 0.22390431567678465,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2628.0,
"completions/max_terminated_length": 2628.0,
"completions/mean_length": 572.94140625,
"completions/mean_terminated_length": 575.1882934570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.10321390628814697,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0437,
"num_tokens": 25398465.0,
"reward": 1.1851181983947754,
"reward_std": 0.3440912663936615,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.6701600551605225,
"rewards/format_reward_step": 0.98046875,
"step": 115
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.879896527947402e-07,
"aux_brier/mean_group_std": 0.11414278409933216,
"aux_brier/mean_r": 0.728497607593749,
"aux_brier/n_active_tok": 309.125,
"aux_brier/n_groups": 16.5,
"aux_brier/n_step_records": 77.28125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.3849875851531165,
"calib/avg_num_step_conf": 10.7578125,
"calib/ece": 0.32947420634920643,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.24206349206349206,
"calib/gap": -0.10577497294200022,
"calib/mean_conf": 0.6637797619047618,
"calib/mu_c": 0.6163489208633095,
"calib/mu_w": 0.7221238938053097,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22083333333333344,
"calib/std_conf": 0.27420488246311364,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2542.0,
"completions/max_terminated_length": 2542.0,
"completions/mean_length": 658.98046875,
"completions/mean_terminated_length": 658.98046875,
"completions/min_length": 128.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.03025030344724655,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0737,
"num_tokens": 25671684.0,
"reward": 1.1835989952087402,
"reward_std": 0.3307391405105591,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.60158371925354,
"rewards/format_reward_step": 0.98046875,
"step": 116
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.8606582269375949e-07,
"aux_brier/mean_group_std": 0.11647272754337376,
"aux_brier/mean_r": 0.7258551974674999,
"aux_brier/n_active_tok": 312.125,
"aux_brier/n_groups": 16.5,
"aux_brier/n_step_records": 78.03125,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.573046066252588,
"calib/avg_num_step_conf": 11.3125,
"calib/ece": 0.2797260000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.188,
"calib/gap": 0.07941563146997932,
"calib/mean_conf": 0.6972339999999999,
"calib/mu_c": 0.7410714285714286,
"calib/mu_w": 0.6616557971014493,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26448000000000005,
"calib/std_conf": 0.24325610422762262,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2502.0,
"completions/max_terminated_length": 2502.0,
"completions/mean_length": 594.1640625,
"completions/mean_terminated_length": 601.2095336914062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 229.0,
"epoch": 0.1248,
"grad_norm": 0.26157084107398987,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0291,
"num_tokens": 25930390.0,
"reward": 1.0866261720657349,
"reward_std": 0.3682401478290558,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6511922478675842,
"rewards/format_reward_step": 0.97265625,
"step": 117
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.868508039050482e-09,
"aux_brier/mean_group_std": 0.12191150511231218,
"aux_brier/mean_r": 0.7398058479360671,
"aux_brier/n_active_tok": 319.625,
"aux_brier/n_groups": 16.46875,
"aux_brier/n_step_records": 79.90625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.48419150858175247,
"calib/avg_num_step_conf": 11.28515625,
"calib/ece": 0.2560562248995984,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.21285140562248997,
"calib/gap": 0.00477439024390236,
"calib/mean_conf": 0.6751084337349398,
"calib/mu_c": 0.6775243902439024,
"calib/mu_w": 0.6727500000000001,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.21859437751004013,
"calib/std_conf": 0.24665382772640074,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2785.0,
"completions/max_terminated_length": 2785.0,
"completions/mean_length": 613.30859375,
"completions/mean_terminated_length": 618.1378173828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.03583335876464844,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0085,
"num_tokens": 26191405.0,
"reward": 1.1269874572753906,
"reward_std": 0.2839047610759735,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.6407626271247864,
"rewards/format_reward_step": 0.97265625,
"step": 118
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.695697490373021e-08,
"aux_brier/mean_group_std": 0.12511687598469357,
"aux_brier/mean_r": 0.7263048152072278,
"aux_brier/n_active_tok": 328.25,
"aux_brier/n_groups": 18.40625,
"aux_brier/n_step_records": 82.0625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5402770483193277,
"calib/avg_num_step_conf": 11.046875,
"calib/ece": 0.25728629032258066,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.24193548387096775,
"calib/gap": 0.023152310924369646,
"calib/mean_conf": 0.7328749999999999,
"calib/mu_c": 0.7433308823529411,
"calib/mu_w": 0.7201785714285714,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2208870967741936,
"calib/std_conf": 0.22405841624703374,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2838.0,
"completions/max_terminated_length": 2838.0,
"completions/mean_length": 650.296875,
"completions/mean_terminated_length": 658.0079345703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.10443716496229172,
"learning_rate": 2.25e-06,
"loss": -0.0069,
"num_tokens": 26462945.0,
"reward": 1.184114933013916,
"reward_std": 0.3726288676261902,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/final_brier_reward_step": 0.658334493637085,
"rewards/format_reward_step": 0.96875,
"step": 119
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.4058681964750406e-09,
"aux_brier/mean_group_std": 0.13934014127984432,
"aux_brier/mean_r": 0.7520619817123914,
"aux_brier/n_active_tok": 306.75,
"aux_brier/n_groups": 14.9375,
"aux_brier/n_step_records": 76.6875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5577572765072765,
"calib/avg_num_step_conf": 10.25,
"calib/ece": 0.18874206349206357,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.1746031746031746,
"calib/gap": 0.029452182952182993,
"calib/mean_conf": 0.6981626984126985,
"calib/mu_c": 0.7103175675675676,
"calib/mu_w": 0.6808653846153846,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.14980158730158738,
"calib/std_conf": 0.22008662462482884,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2139.0,
"completions/max_terminated_length": 2139.0,
"completions/mean_length": 567.890625,
"completions/mean_terminated_length": 570.11767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 222.0,
"epoch": 0.128,
"grad_norm": 0.5418168902397156,
"learning_rate": 2.222222222222222e-06,
"loss": -0.0014,
"num_tokens": 26715013.0,
"reward": 1.2453269958496094,
"reward_std": 0.3320023715496063,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.7000583410263062,
"rewards/format_reward_step": 0.984375,
"step": 120
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.724665293136642e-07,
"aux_brier/mean_group_std": 0.12009707681668952,
"aux_brier/mean_r": 0.7294469403023176,
"aux_brier/n_active_tok": 298.875,
"aux_brier/n_groups": 14.84375,
"aux_brier/n_step_records": 74.71875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5350601946721311,
"calib/avg_num_step_conf": 10.9140625,
"calib/ece": 0.28112000000000015,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.236,
"calib/gap": 0.028297899590164044,
"calib/mean_conf": 0.6958,
"calib/mu_c": 0.7096093750000001,
"calib/mu_w": 0.681311475409836,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23246000000000017,
"calib/std_conf": 0.26218945821676354,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2988.0,
"completions/max_terminated_length": 2988.0,
"completions/mean_length": 622.06640625,
"completions/mean_terminated_length": 626.9645385742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.0886792242527008,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.0221,
"num_tokens": 26979318.0,
"reward": 1.149843454360962,
"reward_std": 0.3843112289905548,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.6462490558624268,
"rewards/format_reward_step": 0.9765625,
"step": 121
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.6662508107188074e-07,
"aux_brier/mean_group_std": 0.10055198484079061,
"aux_brier/mean_r": 0.7419666062335994,
"aux_brier/n_active_tok": 291.875,
"aux_brier/n_groups": 13.90625,
"aux_brier/n_step_records": 72.96875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.47350591243208695,
"calib/avg_num_step_conf": 9.6328125,
"calib/ece": 0.24050511811023628,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.27165354330708663,
"calib/gap": -0.0071516203259828215,
"calib/mean_conf": 0.7447704724409449,
"calib/mu_c": 0.7418140939597315,
"calib/mu_w": 0.7489657142857143,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.1993307086614174,
"calib/std_conf": 0.20916631186184603,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2165.0,
"completions/max_terminated_length": 2165.0,
"completions/mean_length": 575.5703125,
"completions/mean_terminated_length": 577.8275146484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 242.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.08715233951807022,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0025,
"num_tokens": 27234008.0,
"reward": 1.2456527948379517,
"reward_std": 0.2976229786872864,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.6779236793518066,
"rewards/format_reward_step": 0.98828125,
"step": 122
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.3778430255361194e-07,
"aux_brier/mean_group_std": 0.12481574598913961,
"aux_brier/mean_r": 0.7195557555260736,
"aux_brier/n_active_tok": 310.5,
"aux_brier/n_groups": 16.71875,
"aux_brier/n_step_records": 77.625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.44057377049180335,
"calib/avg_num_step_conf": 10.64453125,
"calib/ece": 0.30958333333333343,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.2976190476190476,
"calib/gap": -0.018156998738965924,
"calib/mean_conf": 0.7349404761904761,
"calib/mu_c": 0.7255737704918033,
"calib/mu_w": 0.7437307692307692,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28019841269841284,
"calib/std_conf": 0.23893502227512506,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2185.0,
"completions/max_terminated_length": 2185.0,
"completions/mean_length": 637.51953125,
"completions/mean_terminated_length": 642.5393676757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 209.0,
"epoch": 0.1312,
"grad_norm": 0.26524844765663147,
"learning_rate": 2.138888888888889e-06,
"loss": 0.0073,
"num_tokens": 27502501.0,
"reward": 1.1216198205947876,
"reward_std": 0.3030514419078827,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.6114792823791504,
"rewards/format_reward_step": 0.984375,
"step": 123
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.874448450431501e-08,
"aux_brier/mean_group_std": 0.12056207255091692,
"aux_brier/mean_r": 0.7286804917728428,
"aux_brier/n_active_tok": 296.375,
"aux_brier/n_groups": 13.6875,
"aux_brier/n_step_records": 74.09375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4627914723517655,
"calib/avg_num_step_conf": 10.0,
"calib/ece": 0.22895652173913045,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3201581027667984,
"calib/gap": -0.028399733510992675,
"calib/mean_conf": 0.7627905138339921,
"calib/mu_c": 0.7521265822784811,
"calib/mu_w": 0.7805263157894737,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18362055335968383,
"calib/std_conf": 0.21598772012774484,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1863.0,
"completions/max_terminated_length": 1863.0,
"completions/mean_length": 579.78125,
"completions/mean_terminated_length": 584.346435546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 227.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.047126758843660355,
"learning_rate": 2.1111111111111114e-06,
"loss": -0.0101,
"num_tokens": 27757741.0,
"reward": 1.2809193134307861,
"reward_std": 0.2520931661128998,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.678365170955658,
"rewards/format_reward_step": 0.98828125,
"step": 124
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.2165703001417754e-07,
"aux_brier/mean_group_std": 0.0958769819730208,
"aux_brier/mean_r": 0.7294786087706503,
"aux_brier/n_active_tok": 281.5,
"aux_brier/n_groups": 13.75,
"aux_brier/n_step_records": 70.375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.45331276910224283,
"calib/avg_num_step_conf": 10.3125,
"calib/ece": 0.3159120000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.264,
"calib/gap": -0.021969924812030084,
"calib/mean_conf": 0.7176880000000001,
"calib/mu_c": 0.7060000000000001,
"calib/mu_w": 0.7279699248120302,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2828000000000001,
"calib/std_conf": 0.22902425778943156,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2454.0,
"completions/max_terminated_length": 2454.0,
"completions/mean_length": 613.84765625,
"completions/mean_terminated_length": 618.6810913085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.11766476184129715,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0049,
"num_tokens": 28019694.0,
"reward": 1.101876974105835,
"reward_std": 0.34241873025894165,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/final_brier_reward_step": 0.6106326580047607,
"rewards/format_reward_step": 0.9765625,
"step": 125
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -9.08536907928692e-08,
"aux_brier/mean_group_std": 0.12238830568909123,
"aux_brier/mean_r": 0.7182270958214281,
"aux_brier/n_active_tok": 295.125,
"aux_brier/n_groups": 14.6875,
"aux_brier/n_step_records": 73.78125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.476510496671787,
"calib/avg_num_step_conf": 9.7109375,
"calib/ece": 0.30528000000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.308,
"calib/gap": 0.021370967741935365,
"calib/mean_conf": 0.7644000000000001,
"calib/mu_c": 0.7749999999999999,
"calib/mu_w": 0.7536290322580645,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2828400000000001,
"calib/std_conf": 0.22342569234535226,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2664.0,
"completions/max_terminated_length": 2664.0,
"completions/mean_length": 586.80859375,
"completions/mean_terminated_length": 591.4291381835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 210.0,
"epoch": 0.1344,
"grad_norm": 0.0369451642036438,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0068,
"num_tokens": 28275381.0,
"reward": 1.134810447692871,
"reward_std": 0.31612884998321533,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.6251792907714844,
"rewards/format_reward_step": 0.97265625,
"step": 126
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.7130914514718043e-07,
"aux_brier/mean_group_std": 0.1087601259225485,
"aux_brier/mean_r": 0.7359164889442309,
"aux_brier/n_active_tok": 298.75,
"aux_brier/n_groups": 14.1875,
"aux_brier/n_step_records": 74.6875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5479636591478696,
"calib/avg_num_step_conf": 9.796875,
"calib/ece": 0.25896561264822143,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2924901185770751,
"calib/gap": 0.057599429824561454,
"calib/mean_conf": 0.7517853754940711,
"calib/mu_c": 0.7791052631578949,
"calib/mu_w": 0.7215058333333334,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24252964426877477,
"calib/std_conf": 0.22943804037496499,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2318.0,
"completions/max_terminated_length": 2318.0,
"completions/mean_length": 534.515625,
"completions/mean_terminated_length": 538.7244262695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 162.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.38190189003944397,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0133,
"num_tokens": 28515889.0,
"reward": 1.1805983781814575,
"reward_std": 0.32408082485198975,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.6677061319351196,
"rewards/format_reward_step": 0.98828125,
"step": 127
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.6091981129324786e-08,
"aux_brier/mean_group_std": 0.08898131700184603,
"aux_brier/mean_r": 0.7498846271760894,
"aux_brier/n_active_tok": 279.125,
"aux_brier/n_groups": 14.71875,
"aux_brier/n_step_records": 69.78125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5320246593121349,
"calib/avg_num_step_conf": 9.08203125,
"calib/ece": 0.28488755020080325,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.26104417670682734,
"calib/gap": 0.03906696950032451,
"calib/mean_conf": 0.7101325301204818,
"calib/mu_c": 0.7311565217391306,
"calib/mu_w": 0.6920895522388061,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26658634538152615,
"calib/std_conf": 0.23951175761152257,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2806.0,
"completions/max_terminated_length": 2806.0,
"completions/mean_length": 557.421875,
"completions/mean_terminated_length": 564.0316162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.08702600747346878,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0082,
"num_tokens": 28765253.0,
"reward": 1.0940568447113037,
"reward_std": 0.38162872195243835,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.634039580821991,
"rewards/format_reward_step": 0.97265625,
"step": 128
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.6765578878550365e-07,
"aux_brier/mean_group_std": 0.11354217886692185,
"aux_brier/mean_r": 0.7451862984720397,
"aux_brier/n_active_tok": 273.5,
"aux_brier/n_groups": 13.1875,
"aux_brier/n_step_records": 68.375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5955418034857287,
"calib/avg_num_step_conf": 8.9921875,
"calib/ece": 0.1803921568627452,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.2901960784313726,
"calib/gap": 0.0871558474362214,
"calib/mean_conf": 0.7545098039215685,
"calib/mu_c": 0.7910810810810811,
"calib/mu_w": 0.7039252336448597,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.17725490196078442,
"calib/std_conf": 0.20507974274480467,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2692.0,
"completions/max_terminated_length": 2692.0,
"completions/mean_length": 536.171875,
"completions/mean_terminated_length": 536.171875,
"completions/min_length": 186.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.1376,
"grad_norm": 0.7112810611724854,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.0176,
"num_tokens": 29004897.0,
"reward": 1.2570972442626953,
"reward_std": 0.23593538999557495,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.723701536655426,
"rewards/format_reward_step": 0.99609375,
"step": 129
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.0008908032932418e-07,
"aux_brier/mean_group_std": 0.1152837137003091,
"aux_brier/mean_r": 0.7432447140400249,
"aux_brier/n_active_tok": 275.625,
"aux_brier/n_groups": 12.84375,
"aux_brier/n_step_records": 68.90625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.57125,
"calib/avg_num_step_conf": 9.0625,
"calib/ece": 0.22098425196850408,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.25984251968503935,
"calib/gap": 0.06677948717948723,
"calib/mean_conf": 0.7105905511811024,
"calib/mu_c": 0.7379333333333333,
"calib/mu_w": 0.6711538461538461,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.17051181102362217,
"calib/std_conf": 0.24288466056117392,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1883.0,
"completions/max_terminated_length": 1883.0,
"completions/mean_length": 515.00390625,
"completions/mean_terminated_length": 519.05908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.07466782629489899,
"learning_rate": 1.944444444444445e-06,
"loss": -0.0274,
"num_tokens": 29242026.0,
"reward": 1.2599036693572998,
"reward_std": 0.22464410960674286,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.7114894390106201,
"rewards/format_reward_step": 0.9921875,
"step": 130
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.091396594938182e-08,
"aux_brier/mean_group_std": 0.08964348396018788,
"aux_brier/mean_r": 0.7485890254713838,
"aux_brier/n_active_tok": 265.5,
"aux_brier/n_groups": 12.78125,
"aux_brier/n_step_records": 66.375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5577432790609618,
"calib/avg_num_step_conf": 8.72265625,
"calib/ece": 0.31573122529644276,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.25691699604743085,
"calib/gap": 0.036804871891960134,
"calib/mean_conf": 0.7388142292490117,
"calib/mu_c": 0.7590350877192982,
"calib/mu_w": 0.722230215827338,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30197628458498027,
"calib/std_conf": 0.21122309234134398,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2725.0,
"completions/max_terminated_length": 2725.0,
"completions/mean_length": 518.0078125,
"completions/mean_terminated_length": 520.0392456054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.29209235310554504,
"learning_rate": 1.916666666666667e-06,
"loss": 0.007,
"num_tokens": 29480844.0,
"reward": 1.098313570022583,
"reward_std": 0.2606309950351715,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6354421973228455,
"rewards/format_reward_step": 0.98828125,
"step": 131
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.495046869613551e-08,
"aux_brier/mean_group_std": 0.1333929284300119,
"aux_brier/mean_r": 0.7491438215212559,
"aux_brier/n_active_tok": 280.25,
"aux_brier/n_groups": 13.1875,
"aux_brier/n_step_records": 70.0625,
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4191548582995951,
"calib/avg_num_step_conf": 9.390625,
"calib/ece": 0.27708593750000016,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.265625,
"calib/gap": -0.04846457489878542,
"calib/mean_conf": 0.7235703124999999,
"calib/mu_c": 0.7038815789473685,
"calib/mu_w": 0.7523461538461539,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.20345312500000015,
"calib/std_conf": 0.23251153142081435,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1347.0,
"completions/max_terminated_length": 1347.0,
"completions/mean_length": 530.34375,
"completions/mean_terminated_length": 532.423583984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.1408,
"grad_norm": 0.0908280611038208,
"learning_rate": 1.888888888888889e-06,
"loss": 0.0276,
"num_tokens": 29722204.0,
"reward": 1.2570316791534424,
"reward_std": 0.328461229801178,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.66093909740448,
"rewards/format_reward_step": 0.99609375,
"step": 132
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.310082064335919e-08,
"aux_brier/mean_group_std": 0.12002071878245187,
"aux_brier/mean_r": 0.7273467181940331,
"aux_brier/n_active_tok": 301.75,
"aux_brier/n_groups": 15.1875,
"aux_brier/n_step_records": 75.4375,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.49354471871028166,
"calib/avg_num_step_conf": 10.14453125,
"calib/ece": 0.3808180000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.268,
"calib/gap": 0.0077543982875109,
"calib/mean_conf": 0.7183820000000001,
"calib/mu_c": 0.7230656565656566,
"calib/mu_w": 0.7153112582781457,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.35160000000000013,
"calib/std_conf": 0.24897752323452813,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3009.0,
"completions/max_terminated_length": 3009.0,
"completions/mean_length": 618.140625,
"completions/mean_terminated_length": 625.4703979492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 170.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.03698096051812172,
"learning_rate": 1.8611111111111113e-06,
"loss": 0.0422,
"num_tokens": 29986792.0,
"reward": 1.0185657739639282,
"reward_std": 0.3775428831577301,
"rewards/accuracy_reward_step": 0.38671875,
"rewards/final_brier_reward_step": 0.5820757150650024,
"rewards/format_reward_step": 0.97265625,
"step": 133
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.5884086419256676e-08,
"aux_brier/mean_group_std": 0.11787864586886822,
"aux_brier/mean_r": 0.7481217165348084,
"aux_brier/n_active_tok": 291.875,
"aux_brier/n_groups": 15.9375,
"aux_brier/n_step_records": 72.96875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5220199092741935,
"calib/avg_num_step_conf": 9.83203125,
"calib/ece": 0.2597936507936509,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.2261904761904762,
"calib/gap": 0.026626008064516204,
"calib/mean_conf": 0.7144920634920635,
"calib/mu_c": 0.72759375,
"calib/mu_w": 0.7009677419354838,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23317460317460328,
"calib/std_conf": 0.2360187222205034,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1992.0,
"completions/max_terminated_length": 1992.0,
"completions/mean_length": 605.64453125,
"completions/mean_terminated_length": 608.0196533203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 225.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.12758532166481018,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0279,
"num_tokens": 30250789.0,
"reward": 1.1529327630996704,
"reward_std": 0.363974004983902,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.6507935523986816,
"rewards/format_reward_step": 0.98046875,
"step": 134
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.978052592363724e-08,
"aux_brier/mean_group_std": 0.09441600157997393,
"aux_brier/mean_r": 0.7468353627329324,
"aux_brier/n_active_tok": 289.5,
"aux_brier/n_groups": 15.34375,
"aux_brier/n_step_records": 72.375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5035014005602241,
"calib/avg_num_step_conf": 9.6796875,
"calib/ece": 0.20294820717131482,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.2549800796812749,
"calib/gap": 0.009689875950379845,
"calib/mean_conf": 0.7113147410358567,
"calib/mu_c": 0.7150980392156863,
"calib/mu_w": 0.7054081632653064,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15235059760956182,
"calib/std_conf": 0.23321472532710083,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2677.0,
"completions/max_terminated_length": 2677.0,
"completions/mean_length": 552.80859375,
"completions/mean_terminated_length": 557.1614379882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.144,
"grad_norm": 0.08034074306488037,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0111,
"num_tokens": 30498188.0,
"reward": 1.2638380527496338,
"reward_std": 0.3435894846916199,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.6881648302078247,
"rewards/format_reward_step": 0.98046875,
"step": 135
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.437431097847888e-07,
"aux_brier/mean_group_std": 0.13127980531918174,
"aux_brier/mean_r": 0.7838554624555897,
"aux_brier/n_active_tok": 298.0,
"aux_brier/n_groups": 15.5,
"aux_brier/n_step_records": 74.5,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5721626733921816,
"calib/avg_num_step_conf": 9.796875,
"calib/ece": 0.23940277777777777,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.15873015873015872,
"calib/gap": 0.058459331651954716,
"calib/mean_conf": 0.7009940476190475,
"calib/mu_c": 0.7311516393442623,
"calib/mu_w": 0.6726923076923076,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22813492063492063,
"calib/std_conf": 0.21794277355290823,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2238.0,
"completions/max_terminated_length": 2238.0,
"completions/mean_length": 556.41796875,
"completions/mean_terminated_length": 560.7991943359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 183.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.33265256881713867,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0139,
"num_tokens": 30749119.0,
"reward": 1.1373050212860107,
"reward_std": 0.3235567808151245,
"rewards/accuracy_reward_step": 0.4765625,
"rewards/final_brier_reward_step": 0.6742199063301086,
"rewards/format_reward_step": 0.984375,
"step": 136
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.116071700144275e-07,
"aux_brier/mean_group_std": 0.12785028119366598,
"aux_brier/mean_r": 0.7478586144411333,
"aux_brier/n_active_tok": 297.375,
"aux_brier/n_groups": 15.6875,
"aux_brier/n_step_records": 74.34375,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5757954036233275,
"calib/avg_num_step_conf": 9.64453125,
"calib/ece": 0.28439000000000003,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.256,
"calib/gap": 0.05277238973177134,
"calib/mean_conf": 0.74691,
"calib/mu_c": 0.7728740157480315,
"calib/mu_w": 0.7201016260162602,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26165000000000005,
"calib/std_conf": 0.21356913845403788,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2983.0,
"completions/max_terminated_length": 2983.0,
"completions/mean_length": 576.25390625,
"completions/mean_terminated_length": 576.25390625,
"completions/min_length": 188.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.11480996757745743,
"learning_rate": 1.75e-06,
"loss": 0.0637,
"num_tokens": 31003624.0,
"reward": 1.1488656997680664,
"reward_std": 0.3167296051979065,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.6579625606536865,
"rewards/format_reward_step": 0.9765625,
"step": 137
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.1831648958904317e-06,
"aux_brier/mean_group_std": 0.10604086520736114,
"aux_brier/mean_r": 0.7751440810207609,
"aux_brier/n_active_tok": 285.75,
"aux_brier/n_groups": 14.34375,
"aux_brier/n_step_records": 71.4375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.458918774253981,
"calib/avg_num_step_conf": 9.828125,
"calib/ece": 0.21845849802371553,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.233201581027668,
"calib/gap": -0.028429680181988326,
"calib/mean_conf": 0.6979841897233201,
"calib/mu_c": 0.6874213836477988,
"calib/mu_w": 0.7158510638297871,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1439920948616602,
"calib/std_conf": 0.22807903713595687,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2865.0,
"completions/max_terminated_length": 2865.0,
"completions/mean_length": 535.0078125,
"completions/mean_terminated_length": 539.220458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.1472,
"grad_norm": 0.6342697143554688,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0026,
"num_tokens": 31244922.0,
"reward": 1.2872869968414307,
"reward_std": 0.3326365351676941,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.6882105469703674,
"rewards/format_reward_step": 0.98828125,
"step": 138
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.865244447593842e-08,
"aux_brier/mean_group_std": 0.11965349470914817,
"aux_brier/mean_r": 0.7836175999484832,
"aux_brier/n_active_tok": 283.75,
"aux_brier/n_groups": 14.625,
"aux_brier/n_step_records": 70.9375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5152098575279168,
"calib/avg_num_step_conf": 9.21484375,
"calib/ece": 0.2117391304347826,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.21739130434782608,
"calib/gap": 0.010970992170453275,
"calib/mean_conf": 0.6911857707509882,
"calib/mu_c": 0.6957823129251701,
"calib/mu_w": 0.6848113207547168,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.16094861660079052,
"calib/std_conf": 0.22697568490183898,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1989.0,
"completions/max_terminated_length": 1989.0,
"completions/mean_length": 524.66015625,
"completions/mean_terminated_length": 526.7176513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.03418288379907608,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0167,
"num_tokens": 31482331.0,
"reward": 1.2388288974761963,
"reward_std": 0.2992714047431946,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.6896905899047852,
"rewards/format_reward_step": 0.984375,
"step": 139
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.343148929941698e-07,
"aux_brier/mean_group_std": 0.102011587847724,
"aux_brier/mean_r": 0.7706437269626397,
"aux_brier/n_active_tok": 267.125,
"aux_brier/n_groups": 12.625,
"aux_brier/n_step_records": 66.78125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4567003132613992,
"calib/avg_num_step_conf": 8.57421875,
"calib/ece": 0.22116732283464563,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2795275590551181,
"calib/gap": -0.036213191785590015,
"calib/mean_conf": 0.7223759842519686,
"calib/mu_c": 0.7102573964497042,
"calib/mu_w": 0.7464705882352942,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.13909448818897635,
"calib/std_conf": 0.21748027644758106,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1719.0,
"completions/max_terminated_length": 1719.0,
"completions/mean_length": 539.32421875,
"completions/mean_terminated_length": 541.4392700195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.35845911502838135,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0268,
"num_tokens": 31725414.0,
"reward": 1.330304503440857,
"reward_std": 0.30226558446884155,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/final_brier_reward_step": 0.7040305137634277,
"rewards/format_reward_step": 0.98828125,
"step": 140
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.3600390038337462e-06,
"aux_brier/mean_group_std": 0.104313854179082,
"aux_brier/mean_r": 0.7551585907608853,
"aux_brier/n_active_tok": 273.75,
"aux_brier/n_groups": 13.25,
"aux_brier/n_step_records": 68.4375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.633959101575595,
"calib/avg_num_step_conf": 9.09375,
"calib/ece": 0.14115079365079367,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.23412698412698413,
"calib/gap": 0.11824404961448198,
"calib/mean_conf": 0.6844047619047618,
"calib/mu_c": 0.7289808917197452,
"calib/mu_w": 0.6107368421052632,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1012698412698413,
"calib/std_conf": 0.24850194590823912,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2951.0,
"completions/max_terminated_length": 2951.0,
"completions/mean_length": 594.125,
"completions/mean_terminated_length": 601.1699829101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.1504,
"grad_norm": 0.1141957938671112,
"learning_rate": 1.638888888888889e-06,
"loss": -0.0129,
"num_tokens": 31984606.0,
"reward": 1.2913074493408203,
"reward_std": 0.31009572744369507,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.7433550953865051,
"rewards/format_reward_step": 0.984375,
"step": 141
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.3187952138958856e-07,
"aux_brier/mean_group_std": 0.10804659931268239,
"aux_brier/mean_r": 0.7643841751146132,
"aux_brier/n_active_tok": 289.5,
"aux_brier/n_groups": 14.09375,
"aux_brier/n_step_records": 72.375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.47524177949709856,
"calib/avg_num_step_conf": 9.48046875,
"calib/ece": 0.28179282868525896,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.250996015936255,
"calib/gap": -0.012043197936815075,
"calib/mean_conf": 0.7076892430278884,
"calib/mu_c": 0.7024113475177305,
"calib/mu_w": 0.7144545454545456,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.21386454183266934,
"calib/std_conf": 0.24098618565618288,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2197.0,
"completions/max_terminated_length": 2197.0,
"completions/mean_length": 557.15625,
"completions/mean_terminated_length": 561.5433349609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.2936396896839142,
"learning_rate": 1.6111111111111113e-06,
"loss": -0.011,
"num_tokens": 32232398.0,
"reward": 1.2048792839050293,
"reward_std": 0.22850966453552246,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.6554551124572754,
"rewards/format_reward_step": 0.98046875,
"step": 142
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.5166282119061236e-09,
"aux_brier/mean_group_std": 0.12278076633034357,
"aux_brier/mean_r": 0.7845885686918515,
"aux_brier/n_active_tok": 294.25,
"aux_brier/n_groups": 17.0,
"aux_brier/n_step_records": 73.5625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5487562189054727,
"calib/avg_num_step_conf": 9.4609375,
"calib/ece": 0.2040354330708662,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.18503937007874016,
"calib/gap": 0.05270211442786088,
"calib/mean_conf": 0.6980118110236221,
"calib/mu_c": 0.7229104477611941,
"calib/mu_w": 0.6702083333333332,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18724409448818904,
"calib/std_conf": 0.22331892526202243,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2741.0,
"completions/max_terminated_length": 2741.0,
"completions/mean_length": 560.63671875,
"completions/mean_terminated_length": 562.8353271484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 201.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.1971731185913086,
"learning_rate": 1.5833333333333333e-06,
"loss": 0.0093,
"num_tokens": 32483257.0,
"reward": 1.1926939487457275,
"reward_std": 0.2808946371078491,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.6926510334014893,
"rewards/format_reward_step": 0.9921875,
"step": 143
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.047745739086615e-07,
"aux_brier/mean_group_std": 0.11349697755479779,
"aux_brier/mean_r": 0.751951976930463,
"aux_brier/n_active_tok": 288.75,
"aux_brier/n_groups": 14.5625,
"aux_brier/n_step_records": 72.1875,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5052215825411701,
"calib/avg_num_step_conf": 9.7109375,
"calib/ece": 0.23928286852589642,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.2749003984063745,
"calib/gap": 0.001300709599678762,
"calib/mean_conf": 0.7060557768924304,
"calib/mu_c": 0.7065584415584416,
"calib/mu_w": 0.7052577319587628,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.16589641434262947,
"calib/std_conf": 0.24177296006711663,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2479.0,
"completions/max_terminated_length": 2479.0,
"completions/mean_length": 558.18359375,
"completions/mean_terminated_length": 564.8023681640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.1536,
"grad_norm": 0.10048002749681473,
"learning_rate": 1.5555555555555558e-06,
"loss": -0.0182,
"num_tokens": 32730280.0,
"reward": 1.2574422359466553,
"reward_std": 0.3413987457752228,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.678206205368042,
"rewards/format_reward_step": 0.97265625,
"step": 144
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.063732889052595e-07,
"aux_brier/mean_group_std": 0.13482206581994385,
"aux_brier/mean_r": 0.728899507439789,
"aux_brier/n_active_tok": 302.125,
"aux_brier/n_groups": 15.34375,
"aux_brier/n_step_records": 75.53125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4406591884780502,
"calib/avg_num_step_conf": 9.7890625,
"calib/ece": 0.23324110671936765,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2924901185770751,
"calib/gap": -0.03391012325162723,
"calib/mean_conf": 0.7278656126482212,
"calib/mu_c": 0.7162048192771084,
"calib/mu_w": 0.7501149425287357,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15249011857707517,
"calib/std_conf": 0.23770183793614533,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1298.0,
"completions/max_terminated_length": 1298.0,
"completions/mean_length": 535.14453125,
"completions/mean_terminated_length": 539.3582763671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.17978081107139587,
"learning_rate": 1.527777777777778e-06,
"loss": -0.0044,
"num_tokens": 32969981.0,
"reward": 1.3148910999298096,
"reward_std": 0.3562201261520386,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.6892519593238831,
"rewards/format_reward_step": 0.98828125,
"step": 145
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.7008681346686316e-07,
"aux_brier/mean_group_std": 0.12852282145696792,
"aux_brier/mean_r": 0.7500887148502667,
"aux_brier/n_active_tok": 320.0,
"aux_brier/n_groups": 16.71875,
"aux_brier/n_step_records": 80.0,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4882333978078659,
"calib/avg_num_step_conf": 10.546875,
"calib/ece": 0.33517928286852594,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.29880478087649404,
"calib/gap": -0.006728562217924172,
"calib/mean_conf": 0.7341434262948207,
"calib/mu_c": 0.7303636363636362,
"calib/mu_w": 0.7370921985815604,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31553784860557776,
"calib/std_conf": 0.2305570150696401,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2851.0,
"completions/max_terminated_length": 2851.0,
"completions/mean_length": 589.796875,
"completions/mean_terminated_length": 596.79052734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.060811351984739304,
"learning_rate": 1.5e-06,
"loss": 0.0383,
"num_tokens": 33228185.0,
"reward": 1.0732979774475098,
"reward_std": 0.3476945459842682,
"rewards/accuracy_reward_step": 0.43359375,
"rewards/final_brier_reward_step": 0.5978792905807495,
"rewards/format_reward_step": 0.98046875,
"step": 146
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 8.878063501027711e-08,
"aux_brier/mean_group_std": 0.11758874771701373,
"aux_brier/mean_r": 0.7280103099137688,
"aux_brier/n_active_tok": 305.375,
"aux_brier/n_groups": 15.0625,
"aux_brier/n_step_records": 76.34375,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6231580919080919,
"calib/avg_num_step_conf": 9.875,
"calib/ece": 0.34956862745098044,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.3137254901960784,
"calib/gap": 0.08958479020979015,
"calib/mean_conf": 0.7591372549019608,
"calib/mu_c": 0.8093750000000001,
"calib/mu_w": 0.7197902097902099,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3347450980392157,
"calib/std_conf": 0.22486954994935443,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1712.0,
"completions/max_terminated_length": 1712.0,
"completions/mean_length": 577.875,
"completions/mean_terminated_length": 580.1412353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.1568,
"grad_norm": 0.15195201337337494,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0478,
"num_tokens": 33479801.0,
"reward": 1.0961443185806274,
"reward_std": 0.326327919960022,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.6423898935317993,
"rewards/format_reward_step": 0.99609375,
"step": 147
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.185161645164364e-08,
"aux_brier/mean_group_std": 0.1443634725519226,
"aux_brier/mean_r": 0.7559351885308664,
"aux_brier/n_active_tok": 293.5,
"aux_brier/n_groups": 15.09375,
"aux_brier/n_step_records": 73.375,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5365727243278264,
"calib/avg_num_step_conf": 9.41015625,
"calib/ece": 0.1691071428571429,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.25,
"calib/gap": 0.03961224489795945,
"calib/mean_conf": 0.732202380952381,
"calib/mu_c": 0.7487074829931973,
"calib/mu_w": 0.7090952380952379,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1589880952380953,
"calib/std_conf": 0.2014777024821994,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3030.0,
"completions/max_terminated_length": 3030.0,
"completions/mean_length": 577.28515625,
"completions/mean_terminated_length": 579.549072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.4152239263057709,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0645,
"num_tokens": 33732698.0,
"reward": 1.2391388416290283,
"reward_std": 0.3460821807384491,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.6987428665161133,
"rewards/format_reward_step": 0.98046875,
"step": 148
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.499285150838528e-07,
"aux_brier/mean_group_std": 0.13527552986521668,
"aux_brier/mean_r": 0.7384624378093363,
"aux_brier/n_active_tok": 304.875,
"aux_brier/n_groups": 15.5625,
"aux_brier/n_step_records": 76.21875,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5407376199118484,
"calib/avg_num_step_conf": 10.30078125,
"calib/ece": 0.2522489959839358,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.285140562248996,
"calib/gap": 0.040521778584391765,
"calib/mean_conf": 0.7263855421686747,
"calib/mu_c": 0.7452631578947367,
"calib/mu_w": 0.704741379310345,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.22224899598393583,
"calib/std_conf": 0.2391530150557342,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3011.0,
"completions/max_terminated_length": 3011.0,
"completions/mean_length": 623.921875,
"completions/mean_terminated_length": 628.8346557617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 158.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.12097613513469696,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0581,
"num_tokens": 33996878.0,
"reward": 1.1659348011016846,
"reward_std": 0.3879719078540802,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.6559269428253174,
"rewards/format_reward_step": 0.96484375,
"step": 149
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.6660795848499887e-07,
"aux_brier/mean_group_std": 0.12032826164802157,
"aux_brier/mean_r": 0.7387447603773472,
"aux_brier/n_active_tok": 281.625,
"aux_brier/n_groups": 15.375,
"aux_brier/n_step_records": 70.40625,
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.5689479060265576,
"calib/avg_num_step_conf": 9.375,
"calib/ece": 0.14641975308641983,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.2551440329218107,
"calib/gap": 0.06222092514227351,
"calib/mean_conf": 0.7320164609053498,
"calib/mu_c": 0.7548051948051948,
"calib/mu_w": 0.6925842696629213,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.12234567901234573,
"calib/std_conf": 0.21270462850699287,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2751.0,
"completions/max_terminated_length": 2751.0,
"completions/mean_length": 546.6328125,
"completions/mean_terminated_length": 548.7764892578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.16,
"grad_norm": 0.10970129072666168,
"learning_rate": 1.3888888888888892e-06,
"loss": 0.0125,
"num_tokens": 34241776.0,
"reward": 1.2561277151107788,
"reward_std": 0.34284496307373047,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.70419842004776,
"rewards/format_reward_step": 0.94921875,
"step": 150
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.444867650667874e-08,
"aux_brier/mean_group_std": 0.15244814157287057,
"aux_brier/mean_r": 0.7331580950992309,
"aux_brier/n_active_tok": 331.875,
"aux_brier/n_groups": 19.46875,
"aux_brier/n_step_records": 82.96875,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.500032535137949,
"calib/avg_num_step_conf": 10.9609375,
"calib/ece": 0.3265060240963856,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.2891566265060241,
"calib/gap": 0.009864003123372989,
"calib/mean_conf": 0.7433734939759037,
"calib/mu_c": 0.7487610619469025,
"calib/mu_w": 0.7388970588235295,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3080321285140563,
"calib/std_conf": 0.2085228817879649,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3070.0,
"completions/max_terminated_length": 3070.0,
"completions/mean_length": 641.99609375,
"completions/mean_terminated_length": 641.99609375,
"completions/min_length": 187.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.057767193764448166,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0648,
"num_tokens": 34513151.0,
"reward": 1.0808541774749756,
"reward_std": 0.4014917016029358,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.6124788522720337,
"rewards/format_reward_step": 0.97265625,
"step": 151
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.6950521814228523e-09,
"aux_brier/mean_group_std": 0.1271265112493043,
"aux_brier/mean_r": 0.7831502268218242,
"aux_brier/n_active_tok": 295.875,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 73.96875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5979885057471265,
"calib/avg_num_step_conf": 9.65234375,
"calib/ece": 0.21739043824701193,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.19123505976095617,
"calib/gap": 0.08982503192848013,
"calib/mean_conf": 0.6775498007968127,
"calib/mu_c": 0.7258620689655172,
"calib/mu_w": 0.6360370370370371,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.21639442231075698,
"calib/std_conf": 0.2224305982230339,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2046.0,
"completions/max_terminated_length": 2046.0,
"completions/mean_length": 555.18359375,
"completions/mean_terminated_length": 563.99609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.15894092619419098,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0171,
"num_tokens": 34760670.0,
"reward": 1.1123087406158447,
"reward_std": 0.38148149847984314,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.6836100816726685,
"rewards/format_reward_step": 0.9765625,
"step": 152
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.9415360288238537e-07,
"aux_brier/mean_group_std": 0.14774658666911658,
"aux_brier/mean_r": 0.7723663608067817,
"aux_brier/n_active_tok": 316.125,
"aux_brier/n_groups": 16.1875,
"aux_brier/n_step_records": 79.03125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.49931782744282754,
"calib/avg_num_step_conf": 10.30859375,
"calib/ece": 0.21603174603174613,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.26587301587301587,
"calib/gap": 0.0055275467775469656,
"calib/mean_conf": 0.7323809523809524,
"calib/mu_c": 0.7346621621621623,
"calib/mu_w": 0.7291346153846153,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18055555555555564,
"calib/std_conf": 0.214621958415288,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2199.0,
"completions/max_terminated_length": 2199.0,
"completions/mean_length": 584.890625,
"completions/mean_terminated_length": 589.4960327148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.1632,
"grad_norm": 0.24331292510032654,
"learning_rate": 1.3055555555555556e-06,
"loss": -0.0241,
"num_tokens": 35017722.0,
"reward": 1.2409024238586426,
"reward_std": 0.28328534960746765,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.6823593974113464,
"rewards/format_reward_step": 0.984375,
"step": 153
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.731180373805042e-07,
"aux_brier/mean_group_std": 0.1388109483574081,
"aux_brier/mean_r": 0.7655757917974565,
"aux_brier/n_active_tok": 305.625,
"aux_brier/n_groups": 16.375,
"aux_brier/n_step_records": 76.40625,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6012010113780024,
"calib/avg_num_step_conf": 9.89453125,
"calib/ece": 0.3026450592885376,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.2648221343873518,
"calib/gap": 0.06418975979772445,
"calib/mean_conf": 0.7280268774703559,
"calib/mu_c": 0.7635469026548672,
"calib/mu_w": 0.6993571428571428,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2920158102766799,
"calib/std_conf": 0.21420561635032018,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2036.0,
"completions/max_terminated_length": 2036.0,
"completions/mean_length": 549.5859375,
"completions/mean_terminated_length": 553.9133911132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.06564311683177948,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0272,
"num_tokens": 35262856.0,
"reward": 1.098493218421936,
"reward_std": 0.3088676333427429,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.6517854928970337,
"rewards/format_reward_step": 0.98828125,
"step": 154
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.4338358681208483e-07,
"aux_brier/mean_group_std": 0.12692619903557353,
"aux_brier/mean_r": 0.7818858282035643,
"aux_brier/n_active_tok": 279.25,
"aux_brier/n_groups": 13.6875,
"aux_brier/n_step_records": 69.8125,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5334280771131983,
"calib/avg_num_step_conf": 8.8125,
"calib/ece": 0.2544313725490196,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.21176470588235294,
"calib/gap": 0.028876050420168164,
"calib/mean_conf": 0.6697254901960785,
"calib/mu_c": 0.6851260504201682,
"calib/mu_w": 0.65625,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.22874509803921572,
"calib/std_conf": 0.22393347037495637,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1344.0,
"completions/max_terminated_length": 1344.0,
"completions/mean_length": 507.01953125,
"completions/mean_terminated_length": 509.00787353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.034964244812726974,
"learning_rate": 1.25e-06,
"loss": 0.0088,
"num_tokens": 35499869.0,
"reward": 1.1278388500213623,
"reward_std": 0.3299586772918701,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.667605459690094,
"rewards/format_reward_step": 0.9921875,
"step": 155
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.5496790095891342e-08,
"aux_brier/mean_group_std": 0.12602172269006534,
"aux_brier/mean_r": 0.7435304407654402,
"aux_brier/n_active_tok": 299.5,
"aux_brier/n_groups": 15.875,
"aux_brier/n_step_records": 74.875,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5492846788933006,
"calib/avg_num_step_conf": 9.71875,
"calib/ece": 0.2830555555555557,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.30158730158730157,
"calib/gap": 0.046642087351105976,
"calib/mean_conf": 0.7194047619047619,
"calib/mu_c": 0.7421705426356588,
"calib/mu_w": 0.6955284552845529,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2452777777777779,
"calib/std_conf": 0.24476268644492558,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2472.0,
"completions/max_terminated_length": 2472.0,
"completions/mean_length": 547.55078125,
"completions/mean_terminated_length": 556.2420654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.1664,
"grad_norm": 0.0664546936750412,
"learning_rate": 1.2222222222222223e-06,
"loss": -0.0023,
"num_tokens": 35744802.0,
"reward": 1.1587910652160645,
"reward_std": 0.30581116676330566,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.6586019396781921,
"rewards/format_reward_step": 0.98046875,
"step": 156
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.444379716109001e-07,
"aux_brier/mean_group_std": 0.14657826141004715,
"aux_brier/mean_r": 0.7612826136247497,
"aux_brier/n_active_tok": 306.625,
"aux_brier/n_groups": 15.25,
"aux_brier/n_step_records": 76.65625,
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5845720302242041,
"calib/avg_num_step_conf": 10.171875,
"calib/ece": 0.2230588235294118,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.24705882352941178,
"calib/gap": 0.0801096246748424,
"calib/mean_conf": 0.712156862745098,
"calib/mu_c": 0.7489130434782609,
"calib/mu_w": 0.6688034188034185,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1970196078431373,
"calib/std_conf": 0.22981931287546264,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1344.0,
"completions/max_terminated_length": 1344.0,
"completions/mean_length": 552.31640625,
"completions/mean_terminated_length": 554.4823608398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.05387997254729271,
"learning_rate": 1.1944444444444446e-06,
"loss": -0.0296,
"num_tokens": 35989923.0,
"reward": 1.213773488998413,
"reward_std": 0.28469476103782654,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.7066562175750732,
"rewards/format_reward_step": 0.99609375,
"step": 157
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.475260010402879e-07,
"aux_brier/mean_group_std": 0.12277746991169511,
"aux_brier/mean_r": 0.7717797308190276,
"aux_brier/n_active_tok": 301.375,
"aux_brier/n_groups": 15.84375,
"aux_brier/n_step_records": 75.34375,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5251703518286748,
"calib/avg_num_step_conf": 10.12109375,
"calib/ece": 0.21101214574898786,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.2388663967611336,
"calib/gap": 0.026731330830204514,
"calib/mean_conf": 0.6919838056680162,
"calib/mu_c": 0.702156862745098,
"calib/mu_w": 0.6754255319148935,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.14178137651821865,
"calib/std_conf": 0.23126240363034653,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2964.0,
"completions/max_terminated_length": 2964.0,
"completions/mean_length": 556.265625,
"completions/mean_terminated_length": 567.3466186523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.062464676797389984,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0426,
"num_tokens": 36237567.0,
"reward": 1.2532968521118164,
"reward_std": 0.32989680767059326,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.6928750276565552,
"rewards/format_reward_step": 0.96484375,
"step": 158
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.0174982468554887e-07,
"aux_brier/mean_group_std": 0.1261045529526276,
"aux_brier/mean_r": 0.7995295352517525,
"aux_brier/n_active_tok": 284.375,
"aux_brier/n_groups": 14.46875,
"aux_brier/n_step_records": 71.09375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4827797880589299,
"calib/avg_num_step_conf": 9.01953125,
"calib/ece": 0.20079365079365083,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.17857142857142858,
"calib/gap": -0.014422331351770623,
"calib/mean_conf": 0.6492857142857144,
"calib/mu_c": 0.6432191780821918,
"calib/mu_w": 0.6576415094339624,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.13535714285714293,
"calib/std_conf": 0.2180657341241236,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2918.0,
"completions/max_terminated_length": 2918.0,
"completions/mean_length": 518.59765625,
"completions/mean_terminated_length": 522.6810913085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 64.0,
"epoch": 0.1696,
"grad_norm": 0.03297615423798561,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0159,
"num_tokens": 36475112.0,
"reward": 1.2339849472045898,
"reward_std": 0.29382210969924927,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.6859399080276489,
"rewards/format_reward_step": 0.984375,
"step": 159
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.561264022653113e-07,
"aux_brier/mean_group_std": 0.12557265117017866,
"aux_brier/mean_r": 0.7428592234899737,
"aux_brier/n_active_tok": 308.375,
"aux_brier/n_groups": 14.90625,
"aux_brier/n_step_records": 77.09375,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5450541152781273,
"calib/avg_num_step_conf": 9.72265625,
"calib/ece": 0.2701581027667984,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.3557312252964427,
"calib/gap": 0.052652907123080794,
"calib/mean_conf": 0.7464426877470356,
"calib/mu_c": 0.7705839416058393,
"calib/mu_w": 0.7179310344827585,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2375494071146245,
"calib/std_conf": 0.23149965143070703,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1949.0,
"completions/max_terminated_length": 1949.0,
"completions/mean_length": 552.23828125,
"completions/mean_terminated_length": 552.23828125,
"completions/min_length": 201.0,
"completions/min_terminated_length": 201.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.19734477996826172,
"learning_rate": 1.111111111111111e-06,
"loss": 0.0347,
"num_tokens": 36721325.0,
"reward": 1.197866678237915,
"reward_std": 0.2875242829322815,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/final_brier_reward_step": 0.6742792725563049,
"rewards/format_reward_step": 0.98828125,
"step": 160
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.4726692812726547e-07,
"aux_brier/mean_group_std": 0.12691666415635272,
"aux_brier/mean_r": 0.7496726792116155,
"aux_brier/n_active_tok": 279.375,
"aux_brier/n_groups": 13.1875,
"aux_brier/n_step_records": 69.84375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5176443275034824,
"calib/avg_num_step_conf": 9.2265625,
"calib/ece": 0.20284584980237164,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.28063241106719367,
"calib/gap": 0.019548057576226685,
"calib/mean_conf": 0.7101185770750988,
"calib/mu_c": 0.7156043956043957,
"calib/mu_w": 0.696056338028169,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0967984189723321,
"calib/std_conf": 0.24223185282794146,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2200.0,
"completions/max_terminated_length": 2200.0,
"completions/mean_length": 531.47265625,
"completions/mean_terminated_length": 533.556884765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.10202208906412125,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0289,
"num_tokens": 36961302.0,
"reward": 1.3936083316802979,
"reward_std": 0.2275093048810959,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7384960651397705,
"rewards/format_reward_step": 0.98828125,
"step": 161
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.2315262043482988e-07,
"aux_brier/mean_group_std": 0.12375735595768081,
"aux_brier/mean_r": 0.7267927646386473,
"aux_brier/n_active_tok": 293.75,
"aux_brier/n_groups": 14.3125,
"aux_brier/n_step_records": 73.4375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4668095801301005,
"calib/avg_num_step_conf": 9.515625,
"calib/ece": 0.21763779527559055,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.31496062992125984,
"calib/gap": -0.01667356593731506,
"calib/mean_conf": 0.7501574803149607,
"calib/mu_c": 0.7451685393258428,
"calib/mu_w": 0.7618421052631579,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.133503937007874,
"calib/std_conf": 0.2269013599969254,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2190.0,
"completions/max_terminated_length": 2190.0,
"completions/mean_length": 535.77734375,
"completions/mean_terminated_length": 537.8784790039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.1728,
"grad_norm": 0.08193613588809967,
"learning_rate": 1.0555555555555557e-06,
"loss": -0.0006,
"num_tokens": 37202605.0,
"reward": 1.372331976890564,
"reward_std": 0.2566692531108856,
"rewards/accuracy_reward_step": 0.6953125,
"rewards/final_brier_reward_step": 0.723703145980835,
"rewards/format_reward_step": 0.9921875,
"step": 162
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.674164930049905e-07,
"aux_brier/mean_group_std": 0.13808864230548526,
"aux_brier/mean_r": 0.7291608496776432,
"aux_brier/n_active_tok": 314.25,
"aux_brier/n_groups": 16.5625,
"aux_brier/n_step_records": 78.5625,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5460588793922128,
"calib/avg_num_step_conf": 10.39453125,
"calib/ece": 0.26658730158730165,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.28174603174603174,
"calib/gap": 0.05227350427350419,
"calib/mean_conf": 0.7570634920634921,
"calib/mu_c": 0.7813333333333332,
"calib/mu_w": 0.729059829059829,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.24396825396825406,
"calib/std_conf": 0.21383522606265276,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2636.0,
"completions/max_terminated_length": 2636.0,
"completions/mean_length": 597.42578125,
"completions/mean_terminated_length": 599.7686767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.14409030973911285,
"learning_rate": 1.0277777777777777e-06,
"loss": 0.0219,
"num_tokens": 37460378.0,
"reward": 1.187504529953003,
"reward_std": 0.31250619888305664,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.6718930006027222,
"rewards/format_reward_step": 0.984375,
"step": 163
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.507768979258088e-08,
"aux_brier/mean_group_std": 0.13708992804717865,
"aux_brier/mean_r": 0.7332738047370835,
"aux_brier/n_active_tok": 324.25,
"aux_brier/n_groups": 18.0,
"aux_brier/n_step_records": 81.0625,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5002587656876698,
"calib/avg_num_step_conf": 10.48828125,
"calib/ece": 0.24116465863453815,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.3253012048192771,
"calib/gap": 0.025842929227584577,
"calib/mean_conf": 0.7524096385542168,
"calib/mu_c": 0.7646564885496183,
"calib/mu_w": 0.7388135593220337,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23373493975903614,
"calib/std_conf": 0.21775741095903073,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2839.0,
"completions/max_terminated_length": 2839.0,
"completions/mean_length": 621.26171875,
"completions/mean_terminated_length": 626.153564453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 272.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.20858323574066162,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0054,
"num_tokens": 37725557.0,
"reward": 1.1577627658843994,
"reward_std": 0.38337188959121704,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.6466757655143738,
"rewards/format_reward_step": 0.96875,
"step": 164
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.474337839457167e-08,
"aux_brier/mean_group_std": 0.13658670486017807,
"aux_brier/mean_r": 0.7416463819840573,
"aux_brier/n_active_tok": 316.125,
"aux_brier/n_groups": 15.875,
"aux_brier/n_step_records": 79.03125,
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4655030149158997,
"calib/avg_num_step_conf": 10.32421875,
"calib/ece": 0.3632738095238095,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.2896825396825397,
"calib/gap": -0.024930815614090718,
"calib/mean_conf": 0.7630753968253969,
"calib/mu_c": 0.7495217391304347,
"calib/mu_w": 0.7744525547445255,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33499999999999996,
"calib/std_conf": 0.20081423113522068,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2180.0,
"completions/max_terminated_length": 2180.0,
"completions/mean_length": 593.52734375,
"completions/mean_terminated_length": 593.52734375,
"completions/min_length": 243.0,
"completions/min_terminated_length": 243.0,
"epoch": 0.176,
"grad_norm": 0.3347119092941284,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0236,
"num_tokens": 37983076.0,
"reward": 1.0855225324630737,
"reward_std": 0.38842636346817017,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.5920901298522949,
"rewards/format_reward_step": 0.9765625,
"step": 165
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -2.660470514170843e-07,
"aux_brier/mean_group_std": 0.14491428261953823,
"aux_brier/mean_r": 0.7301434048541713,
"aux_brier/n_active_tok": 306.625,
"aux_brier/n_groups": 15.0625,
"aux_brier/n_step_records": 76.65625,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.4630952380952381,
"calib/avg_num_step_conf": 10.3203125,
"calib/ece": 0.3020080321285141,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.321285140562249,
"calib/gap": -0.020974206349206215,
"calib/mean_conf": 0.7371084337349397,
"calib/mu_c": 0.7282638888888889,
"calib/mu_w": 0.7492380952380951,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2304016064257029,
"calib/std_conf": 0.24145407964968105,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2744.0,
"completions/max_terminated_length": 2744.0,
"completions/mean_length": 628.375,
"completions/mean_terminated_length": 633.3228149414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 236.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.06704939156770706,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0287,
"num_tokens": 38250124.0,
"reward": 1.2075164318084717,
"reward_std": 0.31614387035369873,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.6425660252571106,
"rewards/format_reward_step": 0.96875,
"step": 166
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.5604936076663023e-07,
"aux_brier/mean_group_std": 0.11707872614701348,
"aux_brier/mean_r": 0.7300927916334877,
"aux_brier/n_active_tok": 304.0,
"aux_brier/n_groups": 16.125,
"aux_brier/n_step_records": 76.0,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5730337078651686,
"calib/avg_num_step_conf": 9.83984375,
"calib/ece": 0.20295275590551187,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4015748031496063,
"calib/gap": 0.046166155941436804,
"calib/mean_conf": 0.7659448818897638,
"calib/mu_c": 0.7821212121212122,
"calib/mu_w": 0.7359550561797754,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15964566929133864,
"calib/std_conf": 0.2396677755379001,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2608.0,
"completions/max_terminated_length": 2608.0,
"completions/mean_length": 578.671875,
"completions/mean_terminated_length": 580.9412231445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 252.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.1026303693652153,
"learning_rate": 9.166666666666666e-07,
"loss": -0.0013,
"num_tokens": 38503872.0,
"reward": 1.319819688796997,
"reward_std": 0.34465086460113525,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.7167792916297913,
"rewards/format_reward_step": 0.9921875,
"step": 167
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.94002819172668e-07,
"aux_brier/mean_group_std": 0.12431108511041629,
"aux_brier/mean_r": 0.6756198881951676,
"aux_brier/n_active_tok": 330.625,
"aux_brier/n_groups": 18.75,
"aux_brier/n_step_records": 82.65625,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4620420536635707,
"calib/avg_num_step_conf": 10.984375,
"calib/ece": 0.30474199999999996,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.464,
"calib/gap": 0.021328173374613124,
"calib/mean_conf": 0.7931419999999999,
"calib/mu_c": 0.8028676470588235,
"calib/mu_w": 0.7815394736842104,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.276942,
"calib/std_conf": 0.24036952559756822,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2910.0,
"completions/max_terminated_length": 2910.0,
"completions/mean_length": 629.79296875,
"completions/mean_terminated_length": 639.7897338867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.1792,
"grad_norm": 0.1148962751030922,
"learning_rate": 8.88888888888889e-07,
"loss": -0.0001,
"num_tokens": 38769771.0,
"reward": 1.1782002449035645,
"reward_std": 0.40775153040885925,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/final_brier_reward_step": 0.6268634796142578,
"rewards/format_reward_step": 0.97265625,
"step": 168
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -6.828714768092325e-07,
"aux_brier/mean_group_std": 0.11830183188373451,
"aux_brier/mean_r": 0.7243447293174061,
"aux_brier/n_active_tok": 310.5,
"aux_brier/n_groups": 16.5,
"aux_brier/n_step_records": 77.625,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5558035714285715,
"calib/avg_num_step_conf": 9.76171875,
"calib/ece": 0.25236220472440957,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.3779527559055118,
"calib/gap": 0.03988556338028182,
"calib/mean_conf": 0.7741732283464566,
"calib/mu_c": 0.7917605633802818,
"calib/mu_w": 0.751875,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2337401574803151,
"calib/std_conf": 0.2037075290545609,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1996.0,
"completions/max_terminated_length": 1996.0,
"completions/mean_length": 572.15234375,
"completions/mean_terminated_length": 574.3961181640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.33049607276916504,
"learning_rate": 8.611111111111112e-07,
"loss": 0.0462,
"num_tokens": 39020426.0,
"reward": 1.220787525177002,
"reward_std": 0.2584887146949768,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.6800249814987183,
"rewards/format_reward_step": 0.9921875,
"step": 169
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.123947742850186e-07,
"aux_brier/mean_group_std": 0.11455481599942005,
"aux_brier/mean_r": 0.6839772577664597,
"aux_brier/n_active_tok": 307.375,
"aux_brier/n_groups": 14.09375,
"aux_brier/n_step_records": 76.84375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5454807692307692,
"calib/avg_num_step_conf": 9.8359375,
"calib/ece": 0.24874015748031492,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4921259842519685,
"calib/gap": 0.05461538461538462,
"calib/mean_conf": 0.8076377952755907,
"calib/mu_c": 0.83,
"calib/mu_w": 0.7753846153846153,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23291338582677162,
"calib/std_conf": 0.2130799882817784,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2209.0,
"completions/max_terminated_length": 2209.0,
"completions/mean_length": 594.58203125,
"completions/mean_terminated_length": 596.9137573242188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 208.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.06769291311502457,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0137,
"num_tokens": 39276791.0,
"reward": 1.2513952255249023,
"reward_std": 0.30168139934539795,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.685268759727478,
"rewards/format_reward_step": 0.98828125,
"step": 170
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.0209773233674797e-07,
"aux_brier/mean_group_std": 0.1321099699793983,
"aux_brier/mean_r": 0.7141771864276769,
"aux_brier/n_active_tok": 298.375,
"aux_brier/n_groups": 14.34375,
"aux_brier/n_step_records": 74.59375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5144159226190477,
"calib/avg_num_step_conf": 9.51171875,
"calib/ece": 0.31582677165354334,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.35826771653543305,
"calib/gap": 0.008085317460317376,
"calib/mean_conf": 0.7766141732283466,
"calib/mu_c": 0.7806249999999999,
"calib/mu_w": 0.7725396825396825,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29425196850393703,
"calib/std_conf": 0.20587329056393774,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2359.0,
"completions/max_terminated_length": 2359.0,
"completions/mean_length": 561.015625,
"completions/mean_terminated_length": 561.015625,
"completions/min_length": 163.0,
"completions/min_terminated_length": 163.0,
"epoch": 0.1824,
"grad_norm": 0.04003489390015602,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0353,
"num_tokens": 39527307.0,
"reward": 1.154179334640503,
"reward_std": 0.30687057971954346,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.6323422193527222,
"rewards/format_reward_step": 0.9921875,
"step": 171
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -5.545451762442588e-07,
"aux_brier/mean_group_std": 0.10414918437127466,
"aux_brier/mean_r": 0.712658002624457,
"aux_brier/n_active_tok": 295.0,
"aux_brier/n_groups": 14.03125,
"aux_brier/n_step_records": 73.75,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.530218632502437,
"calib/avg_num_step_conf": 9.859375,
"calib/ece": 0.22098814229249014,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.41106719367588934,
"calib/gap": 0.014676925219328618,
"calib/mean_conf": 0.8118972332015809,
"calib/mu_c": 0.8168862275449102,
"calib/mu_w": 0.8022093023255816,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18640316205533602,
"calib/std_conf": 0.18762810462326462,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1814.0,
"completions/max_terminated_length": 1814.0,
"completions/mean_length": 540.671875,
"completions/mean_terminated_length": 547.0830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 173.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.09831096976995468,
"learning_rate": 7.777777777777779e-07,
"loss": -0.0324,
"num_tokens": 39769071.0,
"reward": 1.3253530263900757,
"reward_std": 0.3200302720069885,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7154746055603027,
"rewards/format_reward_step": 0.98828125,
"step": 172
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.227553016602272e-07,
"aux_brier/mean_group_std": 0.12887376354818048,
"aux_brier/mean_r": 0.6881525563024199,
"aux_brier/n_active_tok": 323.375,
"aux_brier/n_groups": 17.125,
"aux_brier/n_step_records": 80.84375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5216328924219716,
"calib/avg_num_step_conf": 10.828125,
"calib/ece": 0.2659126984126984,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.4365079365079365,
"calib/gap": 0.0339981755391936,
"calib/mean_conf": 0.7893253968253968,
"calib/mu_c": 0.8032214765100671,
"calib/mu_w": 0.7692233009708735,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23198412698412693,
"calib/std_conf": 0.23270310566631314,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2009.0,
"completions/max_terminated_length": 2009.0,
"completions/mean_length": 627.79296875,
"completions/mean_terminated_length": 630.2549438476562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.17116612195968628,
"learning_rate": 7.5e-07,
"loss": 0.0181,
"num_tokens": 40032946.0,
"reward": 1.2438373565673828,
"reward_std": 0.37067359685897827,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.6706616878509521,
"rewards/format_reward_step": 0.98046875,
"step": 173
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 5.549296602791465e-08,
"aux_brier/mean_group_std": 0.13978389570912286,
"aux_brier/mean_r": 0.727931955577553,
"aux_brier/n_active_tok": 323.875,
"aux_brier/n_groups": 18.84375,
"aux_brier/n_step_records": 80.96875,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.6030387818844352,
"calib/avg_num_step_conf": 10.53515625,
"calib/ece": 0.35875502008032134,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.37751004016064255,
"calib/gap": 0.06310970848516395,
"calib/mean_conf": 0.7878313253012049,
"calib/mu_c": 0.8223008849557522,
"calib/mu_w": 0.7591911764705882,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3463855421686748,
"calib/std_conf": 0.20829802671746114,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2830.0,
"completions/max_terminated_length": 2830.0,
"completions/mean_length": 668.83203125,
"completions/mean_terminated_length": 674.0984497070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 215.0,
"epoch": 0.1856,
"grad_norm": 0.26240843534469604,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0223,
"num_tokens": 40308399.0,
"reward": 1.0805542469024658,
"reward_std": 0.3596838414669037,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.611279308795929,
"rewards/format_reward_step": 0.97265625,
"step": 174
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.8007512625771724e-07,
"aux_brier/mean_group_std": 0.1390635787546291,
"aux_brier/mean_r": 0.7354133986520142,
"aux_brier/n_active_tok": 302.875,
"aux_brier/n_groups": 15.28125,
"aux_brier/n_step_records": 75.71875,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5173918624053395,
"calib/avg_num_step_conf": 9.78125,
"calib/ece": 0.3390909090909091,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.28063241106719367,
"calib/gap": 0.04196187909125926,
"calib/mean_conf": 0.7253359683794466,
"calib/mu_c": 0.7497169811320756,
"calib/mu_w": 0.7077551020408164,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.32272727272727275,
"calib/std_conf": 0.25181472926784115,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2881.0,
"completions/max_terminated_length": 2881.0,
"completions/mean_length": 596.90234375,
"completions/mean_terminated_length": 596.90234375,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.18161000311374664,
"learning_rate": 6.944444444444446e-07,
"loss": 0.0239,
"num_tokens": 40567030.0,
"reward": 1.06131911277771,
"reward_std": 0.3406786620616913,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/final_brier_reward_step": 0.612463653087616,
"rewards/format_reward_step": 0.98828125,
"step": 175
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.1970234808109677e-07,
"aux_brier/mean_group_std": 0.13920629487457764,
"aux_brier/mean_r": 0.7091455783349397,
"aux_brier/n_active_tok": 320.0,
"aux_brier/n_groups": 17.875,
"aux_brier/n_step_records": 80.0,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5805806451612903,
"calib/avg_num_step_conf": 10.58203125,
"calib/ece": 0.32654618473895586,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.39357429718875503,
"calib/gap": 0.04724322580645157,
"calib/mean_conf": 0.8083132530120483,
"calib/mu_c": 0.8318399999999999,
"calib/mu_w": 0.7845967741935483,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31642570281124505,
"calib/std_conf": 0.20189157261617924,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 3014.0,
"completions/max_terminated_length": 3014.0,
"completions/mean_length": 601.2890625,
"completions/mean_terminated_length": 606.0236206054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.3984068036079407,
"learning_rate": 6.666666666666667e-07,
"loss": -0.0087,
"num_tokens": 40825024.0,
"reward": 1.1270815134048462,
"reward_std": 0.3631778657436371,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.6177011728286743,
"rewards/format_reward_step": 0.96875,
"step": 176
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.4940496025648304e-07,
"aux_brier/mean_group_std": 0.12208929355906557,
"aux_brier/mean_r": 0.7155938312015591,
"aux_brier/n_active_tok": 313.5,
"aux_brier/n_groups": 16.5625,
"aux_brier/n_step_records": 78.375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5093722201042065,
"calib/avg_num_step_conf": 10.19921875,
"calib/ece": 0.28416733067729083,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.30677290836653387,
"calib/gap": 0.034957809124412376,
"calib/mean_conf": 0.7414581673306773,
"calib/mu_c": 0.7584496124031008,
"calib/mu_w": 0.7234918032786885,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.25584063745019925,
"calib/std_conf": 0.25027556375279786,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2354.0,
"completions/max_terminated_length": 2354.0,
"completions/mean_length": 574.703125,
"completions/mean_terminated_length": 581.517822265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.1888,
"grad_norm": 0.08498404920101166,
"learning_rate": 6.388888888888889e-07,
"loss": -0.0273,
"num_tokens": 41075980.0,
"reward": 1.1542657613754272,
"reward_std": 0.3269786536693573,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.6405003070831299,
"rewards/format_reward_step": 0.98046875,
"step": 177
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.998680712131609e-07,
"aux_brier/mean_group_std": 0.11104178135653618,
"aux_brier/mean_r": 0.7157304284767535,
"aux_brier/n_active_tok": 295.875,
"aux_brier/n_groups": 14.1875,
"aux_brier/n_step_records": 73.96875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5460354720918101,
"calib/avg_num_step_conf": 9.51953125,
"calib/ece": 0.2464000000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.344,
"calib/gap": 0.05515779864371417,
"calib/mean_conf": 0.7792,
"calib/mu_c": 0.8030281690140845,
"calib/mu_w": 0.7478703703703703,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2288000000000001,
"calib/std_conf": 0.2233494123565137,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2644.0,
"completions/max_terminated_length": 2644.0,
"completions/mean_length": 544.80078125,
"completions/mean_terminated_length": 546.9373168945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.1378621906042099,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0093,
"num_tokens": 41321521.0,
"reward": 1.2118072509765625,
"reward_std": 0.37717777490615845,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.6675414443016052,
"rewards/format_reward_step": 0.97265625,
"step": 178
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 2.420083540788376e-07,
"aux_brier/mean_group_std": 0.11303950486766368,
"aux_brier/mean_r": 0.7173948105286239,
"aux_brier/n_active_tok": 301.25,
"aux_brier/n_groups": 15.25,
"aux_brier/n_step_records": 75.3125,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5191171477079796,
"calib/avg_num_step_conf": 10.10546875,
"calib/ece": 0.21400000000000002,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.36,
"calib/gap": 0.029881154499151186,
"calib/mean_conf": 0.792,
"calib/mu_c": 0.8033548387096775,
"calib/mu_w": 0.7734736842105263,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.19300000000000003,
"calib/std_conf": 0.19980390386576533,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2502.0,
"completions/max_terminated_length": 2502.0,
"completions/mean_length": 581.30078125,
"completions/mean_terminated_length": 588.1937255859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 241.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.22822193801403046,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0258,
"num_tokens": 41576598.0,
"reward": 1.2668393850326538,
"reward_std": 0.36677446961402893,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.6923577785491943,
"rewards/format_reward_step": 0.9765625,
"step": 179
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.296333832587429e-07,
"aux_brier/mean_group_std": 0.1467650592734881,
"aux_brier/mean_r": 0.6713683648774496,
"aux_brier/n_active_tok": 350.0,
"aux_brier/n_groups": 17.0625,
"aux_brier/n_step_records": 87.5,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5514319888364675,
"calib/avg_num_step_conf": 11.74609375,
"calib/ece": 0.295242,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.556,
"calib/gap": 0.05286965911356256,
"calib/mean_conf": 0.848842,
"calib/mu_c": 0.8702013422818794,
"calib/mu_w": 0.8173316831683168,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.274042,
"calib/std_conf": 0.19888212598421207,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2046.0,
"completions/max_terminated_length": 2046.0,
"completions/mean_length": 661.2421875,
"completions/mean_terminated_length": 669.0830078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 228.0,
"epoch": 0.192,
"grad_norm": 0.22756622731685638,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0056,
"num_tokens": 41849732.0,
"reward": 1.234571099281311,
"reward_std": 0.35712069272994995,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.6648468375205994,
"rewards/format_reward_step": 0.97265625,
"step": 180
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -5.952673761133731e-07,
"aux_brier/mean_group_std": 0.12988499873953285,
"aux_brier/mean_r": 0.7105410062150648,
"aux_brier/n_active_tok": 314.75,
"aux_brier/n_groups": 16.65625,
"aux_brier/n_step_records": 78.6875,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5145697522816167,
"calib/avg_num_step_conf": 10.21484375,
"calib/ece": 0.34563306451612896,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.4475806451612903,
"calib/gap": 0.019005867014341704,
"calib/mean_conf": 0.7973508064516129,
"calib/mu_c": 0.8073135593220341,
"calib/mu_w": 0.7883076923076924,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3335887096774193,
"calib/std_conf": 0.21528795869866935,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2648.0,
"completions/max_terminated_length": 2648.0,
"completions/mean_length": 592.8203125,
"completions/mean_terminated_length": 595.1451416015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.20192381739616394,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0639,
"num_tokens": 42107758.0,
"reward": 1.0910775661468506,
"reward_std": 0.3487577736377716,
"rewards/accuracy_reward_step": 0.4609375,
"rewards/final_brier_reward_step": 0.5908727645874023,
"rewards/format_reward_step": 0.96484375,
"step": 181
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 4.255648992923522e-07,
"aux_brier/mean_group_std": 0.11135627675651893,
"aux_brier/mean_r": 0.6907847942857936,
"aux_brier/n_active_tok": 327.0,
"aux_brier/n_groups": 16.5,
"aux_brier/n_step_records": 81.75,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5520652898067956,
"calib/avg_num_step_conf": 10.828125,
"calib/ece": 0.23513833992094874,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.43478260869565216,
"calib/gap": 0.054120586275816396,
"calib/mean_conf": 0.8169565217391305,
"calib/mu_c": 0.8372784810126582,
"calib/mu_w": 0.7831578947368418,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2137944664031622,
"calib/std_conf": 0.19068792170752427,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2737.0,
"completions/max_terminated_length": 2737.0,
"completions/mean_length": 597.72265625,
"completions/mean_terminated_length": 600.0667114257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.02944478951394558,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0262,
"num_tokens": 42366935.0,
"reward": 1.2856700420379639,
"reward_std": 0.2591235935688019,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.7051804661750793,
"rewards/format_reward_step": 0.984375,
"step": 182
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.6183354104720715e-07,
"aux_brier/mean_group_std": 0.11808640196865099,
"aux_brier/mean_r": 0.7135023719089151,
"aux_brier/n_active_tok": 314.875,
"aux_brier/n_groups": 15.65625,
"aux_brier/n_step_records": 78.71875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4999037783052152,
"calib/avg_num_step_conf": 10.7265625,
"calib/ece": 0.32677800000000007,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.4,
"calib/gap": -0.0005118673423568643,
"calib/mean_conf": 0.790778,
"calib/mu_c": 0.7905343511450382,
"calib/mu_w": 0.7910462184873951,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29677800000000004,
"calib/std_conf": 0.21987768353336815,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2706.0,
"completions/max_terminated_length": 2706.0,
"completions/mean_length": 634.01953125,
"completions/mean_terminated_length": 639.0117797851562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 141.0,
"epoch": 0.1952,
"grad_norm": 0.25354498624801636,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.013,
"num_tokens": 42635924.0,
"reward": 1.1540048122406006,
"reward_std": 0.4529300630092621,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.6160194873809814,
"rewards/format_reward_step": 0.9765625,
"step": 183
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.2697582330598607e-07,
"aux_brier/mean_group_std": 0.14336063705529795,
"aux_brier/mean_r": 0.7043994428919576,
"aux_brier/n_active_tok": 321.375,
"aux_brier/n_groups": 15.09375,
"aux_brier/n_step_records": 80.34375,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.44517338331771317,
"calib/avg_num_step_conf": 10.6328125,
"calib/ece": 0.26717131474103595,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.44223107569721115,
"calib/gap": -0.023902798232695033,
"calib/mean_conf": 0.807808764940239,
"calib/mu_c": 0.7985714285714287,
"calib/mu_w": 0.8224742268041237,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23071713147410372,
"calib/std_conf": 0.20463060510268194,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2129.0,
"completions/max_terminated_length": 2129.0,
"completions/mean_length": 592.03515625,
"completions/mean_terminated_length": 599.0553588867188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 242.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.14763610064983368,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0166,
"num_tokens": 42892765.0,
"reward": 1.256502389907837,
"reward_std": 0.35554298758506775,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.6588218212127686,
"rewards/format_reward_step": 0.98046875,
"step": 184
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 6.748861667849049e-07,
"aux_brier/mean_group_std": 0.1525347031811119,
"aux_brier/mean_r": 0.7060887684059763,
"aux_brier/n_active_tok": 333.25,
"aux_brier/n_groups": 19.625,
"aux_brier/n_step_records": 83.3125,
"calib/answer_extract_rate": 0.95703125,
"calib/auroc": 0.6198409433703551,
"calib/avg_num_step_conf": 11.21484375,
"calib/ece": 0.22514285714285714,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.4,
"calib/gap": 0.09866790072672404,
"calib/mean_conf": 0.7900408163265307,
"calib/mu_c": 0.831118881118881,
"calib/mu_w": 0.732450980392157,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.21575510204081633,
"calib/std_conf": 0.22385764054860102,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2785.0,
"completions/max_terminated_length": 2785.0,
"completions/mean_length": 601.6015625,
"completions/mean_terminated_length": 611.1508178710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 291.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.09341681748628616,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0095,
"num_tokens": 43153695.0,
"reward": 1.205216884613037,
"reward_std": 0.27410241961479187,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.6802429556846619,
"rewards/format_reward_step": 0.953125,
"step": 185
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.3564172646196582e-07,
"aux_brier/mean_group_std": 0.12399026065331648,
"aux_brier/mean_r": 0.705690339450971,
"aux_brier/n_active_tok": 325.125,
"aux_brier/n_groups": 16.125,
"aux_brier/n_step_records": 81.28125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5575894295842733,
"calib/avg_num_step_conf": 10.6328125,
"calib/ece": 0.2943650793650794,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.3611111111111111,
"calib/gap": 0.02039187882694171,
"calib/mean_conf": 0.7902380952380952,
"calib/mu_c": 0.798896551724138,
"calib/mu_w": 0.7785046728971963,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2546031746031746,
"calib/std_conf": 0.23210378181511784,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2262.0,
"completions/max_terminated_length": 2262.0,
"completions/mean_length": 603.35546875,
"completions/mean_terminated_length": 605.7216186523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.1984,
"grad_norm": 0.2975756525993347,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0206,
"num_tokens": 43413194.0,
"reward": 1.2223985195159912,
"reward_std": 0.31535643339157104,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.655219554901123,
"rewards/format_reward_step": 0.984375,
"step": 186
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -8.55057671056425e-08,
"aux_brier/mean_group_std": 0.11910087325314367,
"aux_brier/mean_r": 0.6735562742804757,
"aux_brier/n_active_tok": 347.625,
"aux_brier/n_groups": 21.15625,
"aux_brier/n_step_records": 86.90625,
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.49827265479670474,
"calib/avg_num_step_conf": 12.45703125,
"calib/ece": 0.29754032258064517,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.4435483870967742,
"calib/gap": 0.012326601116130709,
"calib/mean_conf": 0.811491935483871,
"calib/mu_c": 0.8167605633802818,
"calib/mu_w": 0.8044339622641511,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26822580645161287,
"calib/std_conf": 0.22131159598089012,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3011.0,
"completions/max_terminated_length": 3011.0,
"completions/mean_length": 657.10546875,
"completions/mean_terminated_length": 664.8972778320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.6251574158668518,
"learning_rate": 3.611111111111111e-07,
"loss": 0.0182,
"num_tokens": 43682957.0,
"reward": 1.1957060098648071,
"reward_std": 0.38417935371398926,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.6343867182731628,
"rewards/format_reward_step": 0.96484375,
"step": 187
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -1.0944355566949726e-07,
"aux_brier/mean_group_std": 0.12051941412352694,
"aux_brier/mean_r": 0.694630832225542,
"aux_brier/n_active_tok": 315.75,
"aux_brier/n_groups": 14.65625,
"aux_brier/n_step_records": 78.9375,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5367207792207792,
"calib/avg_num_step_conf": 10.51953125,
"calib/ece": 0.2666535433070867,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4566929133858268,
"calib/gap": 0.03677532467532485,
"calib/mean_conf": 0.7801968503937008,
"calib/mu_c": 0.7946753246753246,
"calib/mu_w": 0.7578999999999998,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22027559055118115,
"calib/std_conf": 0.24276856349855278,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2134.0,
"completions/max_terminated_length": 2134.0,
"completions/mean_length": 623.03125,
"completions/mean_terminated_length": 625.4745483398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.048046354204416275,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0027,
"num_tokens": 43946525.0,
"reward": 1.2687289714813232,
"reward_std": 0.28813987970352173,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.6842910051345825,
"rewards/format_reward_step": 0.9921875,
"step": 188
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.903525094665742e-07,
"aux_brier/mean_group_std": 0.13133251916872882,
"aux_brier/mean_r": 0.6917744470619167,
"aux_brier/n_active_tok": 318.0,
"aux_brier/n_groups": 15.9375,
"aux_brier/n_step_records": 79.5,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4858169682967527,
"calib/avg_num_step_conf": 10.24609375,
"calib/ece": 0.28181818181818186,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4308300395256917,
"calib/gap": 0.0025863175458863408,
"calib/mean_conf": 0.8273517786561264,
"calib/mu_c": 0.82843537414966,
"calib/mu_w": 0.8258490566037736,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26407114624505934,
"calib/std_conf": 0.18891332361882235,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1537.0,
"completions/max_terminated_length": 1537.0,
"completions/mean_length": 579.78515625,
"completions/mean_terminated_length": 582.058837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.2016,
"grad_norm": 0.08419397473335266,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0249,
"num_tokens": 44202718.0,
"reward": 1.2317867279052734,
"reward_std": 0.2693943381309509,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.653709352016449,
"rewards/format_reward_step": 0.98828125,
"step": 189
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.261895272580693e-07,
"aux_brier/mean_group_std": 0.1180641851584547,
"aux_brier/mean_r": 0.6871991138607908,
"aux_brier/n_active_tok": 318.125,
"aux_brier/n_groups": 15.40625,
"aux_brier/n_step_records": 79.53125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.6120611770779556,
"calib/avg_num_step_conf": 10.78515625,
"calib/ece": 0.2400790513833994,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.42292490118577075,
"calib/gap": 0.08553497676819855,
"calib/mean_conf": 0.7960474308300395,
"calib/mu_c": 0.8312080536912753,
"calib/mu_w": 0.7456730769230767,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.22359683794466423,
"calib/std_conf": 0.22699138454885742,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2934.0,
"completions/max_terminated_length": 2934.0,
"completions/mean_length": 634.890625,
"completions/mean_terminated_length": 634.890625,
"completions/min_length": 216.0,
"completions/min_terminated_length": 216.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.03616810962557793,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0193,
"num_tokens": 44470858.0,
"reward": 1.2515528202056885,
"reward_std": 0.2907295227050781,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.6937117576599121,
"rewards/format_reward_step": 0.984375,
"step": 190
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 3.8819080244723025e-07,
"aux_brier/mean_group_std": 0.12084470755713787,
"aux_brier/mean_r": 0.6825326358416054,
"aux_brier/n_active_tok": 334.875,
"aux_brier/n_groups": 16.09375,
"aux_brier/n_step_records": 83.71875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.601985733564681,
"calib/avg_num_step_conf": 11.265625,
"calib/ece": 0.38772,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.46,
"calib/gap": 0.0589582931688194,
"calib/mean_conf": 0.8105999999999999,
"calib/mu_c": 0.841965811965812,
"calib/mu_w": 0.7830075187969926,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.36516000000000004,
"calib/std_conf": 0.22669636080007988,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2798.0,
"completions/max_terminated_length": 2798.0,
"completions/mean_length": 570.5703125,
"completions/mean_terminated_length": 581.936279296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.05756014212965965,
"learning_rate": 2.5000000000000004e-07,
"loss": -0.0469,
"num_tokens": 44721092.0,
"reward": 1.094632863998413,
"reward_std": 0.2197417914867401,
"rewards/accuracy_reward_step": 0.45703125,
"rewards/final_brier_reward_step": 0.5972816348075867,
"rewards/format_reward_step": 0.9765625,
"step": 191
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -4.017105327686066e-07,
"aux_brier/mean_group_std": 0.13229735907475035,
"aux_brier/mean_r": 0.7189988864627267,
"aux_brier/n_active_tok": 315.125,
"aux_brier/n_groups": 17.78125,
"aux_brier/n_step_records": 78.78125,
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5310182733050848,
"calib/avg_num_step_conf": 10.8203125,
"calib/ece": 0.3099934959349594,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.37398373983739835,
"calib/gap": 0.033709533898305,
"calib/mean_conf": 0.7682178861788618,
"calib/mu_c": 0.7843874999999999,
"calib/mu_w": 0.7506779661016949,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.27894308943089435,
"calib/std_conf": 0.23591018410538667,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2129.0,
"completions/max_terminated_length": 2129.0,
"completions/mean_length": 620.95703125,
"completions/mean_terminated_length": 630.8135375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.2048,
"grad_norm": 0.16255465149879456,
"learning_rate": 2.2222222222222224e-07,
"loss": -0.0091,
"num_tokens": 44985033.0,
"reward": 1.1405599117279053,
"reward_std": 0.36320745944976807,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.6247397065162659,
"rewards/format_reward_step": 0.9609375,
"step": 192
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.73468718795467e-08,
"aux_brier/mean_group_std": 0.12834107534476466,
"aux_brier/mean_r": 0.6905276728515516,
"aux_brier/n_active_tok": 341.875,
"aux_brier/n_groups": 20.4375,
"aux_brier/n_step_records": 85.46875,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.47207613323315806,
"calib/avg_num_step_conf": 11.35546875,
"calib/ece": 0.30897233201581037,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.41106719367588934,
"calib/gap": 0.010392561983471271,
"calib/mean_conf": 0.7943478260869566,
"calib/mu_c": 0.7993181818181819,
"calib/mu_w": 0.7889256198347107,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2907905138339922,
"calib/std_conf": 0.21251196098682956,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2298.0,
"completions/max_terminated_length": 2298.0,
"completions/mean_length": 603.78515625,
"completions/mean_terminated_length": 608.5393676757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.061190713196992874,
"learning_rate": 1.9444444444444447e-07,
"loss": -0.0169,
"num_tokens": 45245314.0,
"reward": 1.166947364807129,
"reward_std": 0.4310789704322815,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.6287269592285156,
"rewards/format_reward_step": 0.98828125,
"step": 193
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.467537613653036e-08,
"aux_brier/mean_group_std": 0.10921226174267654,
"aux_brier/mean_r": 0.7200921041378893,
"aux_brier/n_active_tok": 300.0,
"aux_brier/n_groups": 14.84375,
"aux_brier/n_step_records": 75.0,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5728968405024742,
"calib/avg_num_step_conf": 9.484375,
"calib/ece": 0.2767193675889329,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.45849802371541504,
"calib/gap": 0.061031594975256676,
"calib/mean_conf": 0.8053359683794467,
"calib/mu_c": 0.8321126760563381,
"calib/mu_w": 0.7710810810810814,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26039525691699617,
"calib/std_conf": 0.2147219586728735,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1979.0,
"completions/max_terminated_length": 1979.0,
"completions/mean_length": 543.35546875,
"completions/mean_terminated_length": 543.35546875,
"completions/min_length": 196.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.032329004257917404,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.022,
"num_tokens": 45490357.0,
"reward": 1.2202813625335693,
"reward_std": 0.3027583658695221,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.6701878905296326,
"rewards/format_reward_step": 0.98828125,
"step": 194
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.4904705261664652e-07,
"aux_brier/mean_group_std": 0.1341669397120112,
"aux_brier/mean_r": 0.7263006015498974,
"aux_brier/n_active_tok": 307.0,
"aux_brier/n_groups": 15.25,
"aux_brier/n_step_records": 76.75,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.539510151466323,
"calib/avg_num_step_conf": 10.0390625,
"calib/ece": 0.2530158730158731,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.38095238095238093,
"calib/gap": 0.043546245568804265,
"calib/mean_conf": 0.7946825396825397,
"calib/mu_c": 0.8131724137931035,
"calib/mu_w": 0.7696261682242992,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.23615079365079372,
"calib/std_conf": 0.20581886977499117,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2747.0,
"completions/max_terminated_length": 2747.0,
"completions/mean_length": 564.10546875,
"completions/mean_terminated_length": 570.7944946289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.208,
"grad_norm": 0.07772547006607056,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.0248,
"num_tokens": 45740752.0,
"reward": 1.2275408506393433,
"reward_std": 0.29382142424583435,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.6757882833480835,
"rewards/format_reward_step": 0.984375,
"step": 195
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 7.826108038705426e-08,
"aux_brier/mean_group_std": 0.10924094080593996,
"aux_brier/mean_r": 0.6989476227265496,
"aux_brier/n_active_tok": 300.125,
"aux_brier/n_groups": 12.78125,
"aux_brier/n_step_records": 75.03125,
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5354515474378487,
"calib/avg_num_step_conf": 9.59375,
"calib/ece": 0.2893307086614173,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.4921259842519685,
"calib/gap": 0.026340690005073664,
"calib/mean_conf": 0.8389370078740156,
"calib/mu_c": 0.8501369863013699,
"calib/mu_w": 0.8237962962962962,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2767322834645669,
"calib/std_conf": 0.18039123047418926,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1552.0,
"completions/max_terminated_length": 1552.0,
"completions/mean_length": 518.61328125,
"completions/mean_terminated_length": 522.6968383789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 220.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.09711069613695145,
"learning_rate": 1.1111111111111112e-07,
"loss": -0.0158,
"num_tokens": 45976061.0,
"reward": 1.231645941734314,
"reward_std": 0.18754267692565918,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.660959005355835,
"rewards/format_reward_step": 0.9921875,
"step": 196
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -7.674647184152672e-07,
"aux_brier/mean_group_std": 0.13293358674493327,
"aux_brier/mean_r": 0.7153765468896983,
"aux_brier/n_active_tok": 319.25,
"aux_brier/n_groups": 14.71875,
"aux_brier/n_step_records": 79.8125,
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4699100112485939,
"calib/avg_num_step_conf": 10.6875,
"calib/ece": 0.36051185770750993,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4782608695652174,
"calib/gap": 0.0002670603674539418,
"calib/mean_conf": 0.8032826086956522,
"calib/mu_c": 0.8034166666666666,
"calib/mu_w": 0.8031496062992126,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3328853754940712,
"calib/std_conf": 0.22820994462988967,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1756.0,
"completions/max_terminated_length": 1756.0,
"completions/mean_length": 560.8359375,
"completions/mean_terminated_length": 567.4862060546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 273.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.05755879357457161,
"learning_rate": 8.333333333333334e-08,
"loss": -0.0315,
"num_tokens": 46224691.0,
"reward": 1.1357746124267578,
"reward_std": 0.3277100920677185,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.5977863073348999,
"rewards/format_reward_step": 0.98828125,
"step": 197
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": -3.4224928335824245e-08,
"aux_brier/mean_group_std": 0.1235597167482547,
"aux_brier/mean_r": 0.7128413021535491,
"aux_brier/n_active_tok": 307.25,
"aux_brier/n_groups": 15.3125,
"aux_brier/n_step_records": 76.8125,
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5242310674592918,
"calib/avg_num_step_conf": 9.8671875,
"calib/ece": 0.3136111111111111,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.36904761904761907,
"calib/gap": -0.0028547428276040065,
"calib/mean_conf": 0.7955158730158729,
"calib/mu_c": 0.7943150684931507,
"calib/mu_w": 0.7971698113207547,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2648809523809524,
"calib/std_conf": 0.20008165928682217,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2961.0,
"completions/max_terminated_length": 2961.0,
"completions/mean_length": 545.32421875,
"completions/mean_terminated_length": 547.4627685546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.2112,
"grad_norm": 0.09032348543405533,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0202,
"num_tokens": 46469678.0,
"reward": 1.2239996194839478,
"reward_std": 0.3071012496948242,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.653810977935791,
"rewards/format_reward_step": 0.98046875,
"step": 198
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 9.611757822991507e-08,
"aux_brier/mean_group_std": 0.13088208167619045,
"aux_brier/mean_r": 0.6919954997381719,
"aux_brier/n_active_tok": 319.75,
"aux_brier/n_groups": 15.03125,
"aux_brier/n_step_records": 79.9375,
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.42309462747418947,
"calib/avg_num_step_conf": 10.859375,
"calib/ece": 0.3160887096774196,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.4717741935483871,
"calib/gap": -0.022996646281317812,
"calib/mean_conf": 0.8327016129032258,
"calib/mu_c": 0.8224087591240876,
"calib/mu_w": 0.8454054054054054,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.29818548387096794,
"calib/std_conf": 0.18949944590261614,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2792.0,
"completions/max_terminated_length": 2792.0,
"completions/mean_length": 635.921875,
"completions/mean_terminated_length": 643.4624633789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.02755553461611271,
"learning_rate": 2.777777777777778e-08,
"loss": 0.0438,
"num_tokens": 46736674.0,
"reward": 1.1713604927062988,
"reward_std": 0.42046260833740234,
"rewards/accuracy_reward_step": 0.53515625,
"rewards/final_brier_reward_step": 0.607316792011261,
"rewards/format_reward_step": 0.96875,
"step": 199
},
{
"aux_brier/lambda": 0.050000000000000024,
"aux_brier/loss": 1.1839494104548542e-07,
"aux_brier/mean_group_std": 0.13298443257382436,
"aux_brier/mean_r": 0.6973319277544408,
"aux_brier/n_active_tok": 330.75,
"aux_brier/n_groups": 17.6875,
"aux_brier/n_step_records": 82.6875,
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5944006468566808,
"calib/avg_num_step_conf": 10.98046875,
"calib/ece": 0.2155200000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.412,
"calib/gap": 0.08189475102755861,
"calib/mean_conf": 0.7828,
"calib/mu_c": 0.8145751633986927,
"calib/mu_w": 0.7326804123711341,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1931600000000001,
"calib/std_conf": 0.23010467183436323,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2552.0,
"completions/max_terminated_length": 2552.0,
"completions/mean_length": 612.23046875,
"completions/mean_terminated_length": 619.4901733398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 145.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.0391717366874218,
"learning_rate": 0.0,
"loss": -0.0137,
"num_tokens": 47001453.0,
"reward": 1.2615517377853394,
"reward_std": 0.26289767026901245,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.702457070350647,
"rewards/format_reward_step": 0.9765625,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.02537305059027858,
"train_runtime": 17053.7031,
"train_samples_per_second": 3.002,
"train_steps_per_second": 0.012
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 47001453,
"num_train_epochs": 1,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}