Files
PureRL-1.5B-v6b3-bare-fmt03/trainer_state.json
ModelHub XC 7a7e0e13c9 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/PureRL-1.5B-v6b3-bare-fmt03
Source: Original Platform
2026-06-27 00:19:21 +08:00

8631 lines
336 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.03515625,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.01171875,
"calib/ece": 0.6500000000000001,
"calib/final_conf_rate": 0.01171875,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.025000000000000022,
"calib/mean_conf": 0.9833333333333334,
"calib/mu_c": 1.0,
"calib/mu_w": 0.975,
"calib/nonempty_final_conf_rate": 0.01171875,
"calib/nonempty_reasoning_rate": 0.0390625,
"calib/nonempty_step_conf_rate": 0.0078125,
"calib/pce": 0.6500000000000001,
"calib/std_conf": 0.023570226039551608,
"calib/step_conf_rate": 0.0078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 3070.0,
"completions/max_terminated_length": 3070.0,
"completions/mean_length": 695.9765625,
"completions/mean_terminated_length": 748.6134643554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.0008487591985613108,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0006,
"num_tokens": 235322.0,
"reward": 0.0074518583714962006,
"reward_std": 0.02107703872025013,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.00390625,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0016824332997202873,
"step": 1
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/avg_num_step_conf": 0.04296875,
"calib/ece": 1.0,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/mean_conf": 1.0,
"calib/mu_c": NaN,
"calib/mu_w": 1.0,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.0625,
"calib/nonempty_step_conf_rate": 0.01953125,
"calib/pce": 1.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.01953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2996.0,
"completions/max_terminated_length": 2996.0,
"completions/mean_length": 644.19140625,
"completions/mean_terminated_length": 717.0130004882812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.000985708087682724,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0083,
"num_tokens": 453091.0,
"reward": 0.002142443088814616,
"reward_std": 0.006059744395315647,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0038822719361633062,
"step": 2
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/avg_num_step_conf": 0.02734375,
"calib/ece": 0.23333333333333334,
"calib/final_conf_rate": 0.01171875,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.23333333333333336,
"calib/mu_c": NaN,
"calib/mu_w": 0.23333333333333336,
"calib/nonempty_final_conf_rate": 0.01171875,
"calib/nonempty_reasoning_rate": 0.0546875,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.23333333333333334,
"calib/std_conf": 0.3090127649287144,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 2870.0,
"completions/max_terminated_length": 2870.0,
"completions/mean_length": 724.421875,
"completions/mean_terminated_length": 785.8135375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.0,
"learning_rate": 7.5e-07,
"loss": 0.0,
"num_tokens": 693367.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 3
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.625,
"calib/avg_num_step_conf": 0.078125,
"calib/ece": 0.3749750000000001,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.24995,
"calib/mean_conf": 0.8749750000000001,
"calib/mu_c": 0.99995,
"calib/mu_w": 0.75,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.04296875,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.3749750000000001,
"calib/std_conf": 0.2164919210386383,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 741.92578125,
"completions/mean_terminated_length": 815.1630859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.0011622401652857661,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0023,
"num_tokens": 939036.0,
"reward": 0.0078125,
"reward_std": 0.022097086533904076,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 4
},
{
"calib/answer_extract_rate": 0.03515625,
"calib/auroc": 0.625,
"calib/avg_num_step_conf": 0.02734375,
"calib/ece": 0.375,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.5,
"calib/gap": 0.1499999999999999,
"calib/mean_conf": 0.875,
"calib/mu_c": 0.95,
"calib/mu_w": 0.8,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.046875,
"calib/nonempty_step_conf_rate": 0.015625,
"calib/pce": 0.375,
"calib/std_conf": 0.16393596310755001,
"calib/step_conf_rate": 0.015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12890625,
"completions/max_length": 3061.0,
"completions/max_terminated_length": 3061.0,
"completions/mean_length": 702.37890625,
"completions/mean_terminated_length": 806.3184204101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.0011576212709769607,
"learning_rate": 1.25e-06,
"loss": 0.0156,
"num_tokens": 1175101.0,
"reward": 0.0078125,
"reward_std": 0.022097086533904076,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 5
},
{
"calib/answer_extract_rate": 0.0234375,
"calib/avg_num_step_conf": 0.0703125,
"calib/ece": 0.84,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.84,
"calib/mu_c": NaN,
"calib/mu_w": 0.84,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.02734375,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.84,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2966.0,
"completions/max_terminated_length": 2966.0,
"completions/mean_length": 685.578125,
"completions/mean_terminated_length": 728.2490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0064,
"grad_norm": 0.0015471765073016286,
"learning_rate": 1.5e-06,
"loss": -0.0042,
"num_tokens": 1406129.0,
"reward": 0.002580254338681698,
"reward_std": 0.00729806162416935,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.0011500000255182385,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.003333517350256443,
"step": 6
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.8333333333333334,
"calib/avg_num_step_conf": 0.1484375,
"calib/ece": 0.34759259259259256,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.5,
"calib/gap": 0.5365432098765432,
"calib/mean_conf": 0.5975925925925926,
"calib/mu_c": 1.0,
"calib/mu_w": 0.46345679012345675,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.03515625,
"calib/pce": 0.34759259259259256,
"calib/std_conf": 0.42104233289603427,
"calib/step_conf_rate": 0.03515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1328125,
"completions/max_length": 3039.0,
"completions/max_terminated_length": 3039.0,
"completions/mean_length": 725.26953125,
"completions/mean_terminated_length": 836.3468627929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.0017169080674648285,
"learning_rate": 1.75e-06,
"loss": 0.0288,
"num_tokens": 1648790.0,
"reward": 0.015072671696543694,
"reward_std": 0.035592082887887955,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.003904687473550439,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0009188127587549388,
"step": 7
},
{
"calib/answer_extract_rate": 0.015625,
"calib/avg_num_step_conf": 0.05859375,
"calib/ece": 0.09999999999999998,
"calib/final_conf_rate": 0.00390625,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.9,
"calib/mu_c": 0.9,
"calib/mu_w": NaN,
"calib/nonempty_final_conf_rate": 0.00390625,
"calib/nonempty_reasoning_rate": 0.0234375,
"calib/nonempty_step_conf_rate": 0.015625,
"calib/pce": 0.0,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.11328125,
"completions/max_length": 2942.0,
"completions/max_terminated_length": 2942.0,
"completions/mean_length": 706.6875,
"completions/mean_terminated_length": 796.9691162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.0014117079554125667,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0038,
"num_tokens": 1885782.0,
"reward": 0.00773979164659977,
"reward_std": 0.021891437470912933,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.003867187537252903,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.002912291558459401,
"step": 8
},
{
"calib/answer_extract_rate": 0.046875,
"calib/avg_num_step_conf": 0.0,
"calib/ece": 0.95,
"calib/final_conf_rate": 0.0078125,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.5,
"calib/mean_conf": 0.95,
"calib/mu_c": NaN,
"calib/mu_w": 0.95,
"calib/nonempty_final_conf_rate": 0.0078125,
"calib/nonempty_reasoning_rate": 0.046875,
"calib/nonempty_step_conf_rate": 0.0,
"calib/pce": 0.95,
"calib/std_conf": 0.04999999999999999,
"calib/step_conf_rate": 0.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3045.0,
"completions/max_terminated_length": 3045.0,
"completions/mean_length": 658.0390625,
"completions/mean_terminated_length": 726.112060546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0096,
"grad_norm": 0.0,
"learning_rate": 2.25e-06,
"loss": 0.0,
"num_tokens": 2111344.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/accuracy_reward_step": 0.0,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 9
},
{
"calib/answer_extract_rate": 0.03515625,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.03125,
"calib/ece": 0.3436666666666667,
"calib/final_conf_rate": 0.01171875,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": 0.48450000000000004,
"calib/mean_conf": 0.677,
"calib/mu_c": 1.0,
"calib/mu_w": 0.5155,
"calib/nonempty_final_conf_rate": 0.01171875,
"calib/nonempty_reasoning_rate": 0.0390625,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.3436666666666667,
"calib/std_conf": 0.4567909806465097,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2917.0,
"completions/max_terminated_length": 2917.0,
"completions/mean_length": 707.08203125,
"completions/mean_terminated_length": 776.8798217773438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 8.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.0006159533513709903,
"learning_rate": 2.5e-06,
"loss": 0.0054,
"num_tokens": 2348725.0,
"reward": 0.00390625,
"reward_std": 0.011048543266952038,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 10
},
{
"calib/answer_extract_rate": 0.04296875,
"calib/avg_num_step_conf": 0.0078125,
"calib/ece": 0.515,
"calib/final_conf_rate": 0.0078125,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/mean_conf": 0.485,
"calib/mu_c": 0.485,
"calib/mu_w": NaN,
"calib/nonempty_final_conf_rate": 0.0078125,
"calib/nonempty_reasoning_rate": 0.04296875,
"calib/nonempty_step_conf_rate": 0.00390625,
"calib/pce": 0.0,
"calib/std_conf": 0.385,
"calib/step_conf_rate": 0.00390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2792.0,
"completions/max_terminated_length": 2792.0,
"completions/mean_length": 680.48828125,
"completions/mean_terminated_length": 757.4130249023438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.0017216140404343605,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0,
"num_tokens": 2576978.0,
"reward": 0.015625,
"reward_std": 0.036563027650117874,
"rewards/accuracy_reward_step": 0.015625,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 11
},
{
"calib/answer_extract_rate": 0.046875,
"calib/auroc": 0.3125,
"calib/avg_num_step_conf": 0.07421875,
"calib/ece": 0.7233333333333334,
"calib/final_conf_rate": 0.0234375,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": -0.21999999999999997,
"calib/mean_conf": 0.8766666666666666,
"calib/mu_c": 0.73,
"calib/mu_w": 0.95,
"calib/nonempty_final_conf_rate": 0.0234375,
"calib/nonempty_reasoning_rate": 0.05859375,
"calib/nonempty_step_conf_rate": 0.03125,
"calib/pce": 0.6333333333333333,
"calib/std_conf": 0.20013884069704097,
"calib/step_conf_rate": 0.03125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2974.0,
"completions/max_terminated_length": 2974.0,
"completions/mean_length": 713.0390625,
"completions/mean_terminated_length": 770.2025146484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.0128,
"grad_norm": 0.002639840357005596,
"learning_rate": 3e-06,
"loss": -0.0007,
"num_tokens": 2813260.0,
"reward": 0.021190494298934937,
"reward_std": 0.05565603822469711,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.008079687133431435,
"rewards/format_reward_step": 0.01953125,
"rewards/stepwise_brier_reward": 0.013915101066231728,
"step": 12
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/auroc": 0.4375,
"calib/avg_num_step_conf": 0.0625,
"calib/ece": 0.595,
"calib/final_conf_rate": 0.0234375,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": -0.01749999999999996,
"calib/mean_conf": 0.7616666666666667,
"calib/mu_c": 0.75,
"calib/mu_w": 0.7675,
"calib/nonempty_final_conf_rate": 0.0234375,
"calib/nonempty_reasoning_rate": 0.0625,
"calib/nonempty_step_conf_rate": 0.0234375,
"calib/pce": 0.5116666666666667,
"calib/std_conf": 0.35918503433312593,
"calib/step_conf_rate": 0.0234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 714.85546875,
"completions/mean_terminated_length": 778.7361450195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.0011884482810273767,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.0052,
"num_tokens": 3050423.0,
"reward": 0.01173363346606493,
"reward_std": 0.03318772464990616,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.00390625,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0031845346093177795,
"step": 13
},
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.38888888888888884,
"calib/avg_num_step_conf": 0.07421875,
"calib/ece": 0.7185185185185186,
"calib/final_conf_rate": 0.03515625,
"calib/format_rate": 0.0,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": -0.188888888888889,
"calib/mean_conf": 0.9037037037037037,
"calib/mu_c": 0.7777777777777777,
"calib/mu_w": 0.9666666666666667,
"calib/nonempty_final_conf_rate": 0.03515625,
"calib/nonempty_reasoning_rate": 0.078125,
"calib/nonempty_step_conf_rate": 0.0234375,
"calib/pce": 0.6444444444444445,
"calib/std_conf": 0.21107861998185198,
"calib/step_conf_rate": 0.0234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2902.0,
"completions/max_terminated_length": 2902.0,
"completions/mean_length": 740.328125,
"completions/mean_terminated_length": 824.017333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.0018966062925755978,
"learning_rate": 3.5e-06,
"loss": -0.0005,
"num_tokens": 3294915.0,
"reward": 0.01171875,
"reward_std": 0.03314562886953354,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.0,
"rewards/format_reward_step": 0.0,
"rewards/stepwise_brier_reward": 0.0,
"step": 14
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.6666666666666667,
"calib/avg_num_step_conf": 0.01953125,
"calib/ece": 0.6875,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.0078125,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.08333333333333337,
"calib/mean_conf": 0.9375,
"calib/mu_c": 1.0,
"calib/mu_w": 0.9166666666666666,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.03515625,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.6875,
"calib/std_conf": 0.10825317547305482,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 2984.0,
"completions/max_terminated_length": 2984.0,
"completions/mean_length": 678.6796875,
"completions/mean_terminated_length": 752.1298828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.016,
"grad_norm": 0.0028262899722903967,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.0127,
"num_tokens": 3526105.0,
"reward": 0.007777903228998184,
"reward_std": 0.021999232470989227,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.001708984375,
"rewards/format_reward_step": 0.0078125,
"rewards/stepwise_brier_reward": 0.0026936442591249943,
"step": 15
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/auroc": 0.16666666666666666,
"calib/avg_num_step_conf": 0.01953125,
"calib/ece": 0.48,
"calib/final_conf_rate": 0.01953125,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.6,
"calib/gap": -0.20000000000000007,
"calib/mean_conf": 0.8800000000000001,
"calib/mu_c": 0.7999999999999999,
"calib/mu_w": 1.0,
"calib/nonempty_final_conf_rate": 0.01953125,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.00390625,
"calib/pce": 0.38,
"calib/std_conf": 0.19390719429665315,
"calib/step_conf_rate": 0.00390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 779.78515625,
"completions/mean_terminated_length": 853.0983276367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.002077557845041156,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0057,
"num_tokens": 3784146.0,
"reward": 0.015214828774333,
"reward_std": 0.043034035712480545,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.0029296875,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0034374422393739223,
"step": 16
},
{
"calib/answer_extract_rate": 0.0546875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 0.03125,
"calib/ece": 0.6283333333333334,
"calib/final_conf_rate": 0.0234375,
"calib/format_rate": 0.0078125,
"calib/frac_conf_gt_0.9": 0.6666666666666666,
"calib/gap": 0.020000000000000018,
"calib/mean_conf": 0.9116666666666666,
"calib/mu_c": 0.925,
"calib/mu_w": 0.905,
"calib/nonempty_final_conf_rate": 0.0234375,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.01953125,
"calib/pce": 0.6033333333333334,
"calib/std_conf": 0.12047360245667466,
"calib/step_conf_rate": 0.01953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.07421875,
"completions/max_length": 2962.0,
"completions/max_terminated_length": 2962.0,
"completions/mean_length": 717.52734375,
"completions/mean_terminated_length": 775.0505981445312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.002145631704479456,
"learning_rate": 4.25e-06,
"loss": 0.0017,
"num_tokens": 4020929.0,
"reward": 0.011951509863138199,
"reward_std": 0.026172826066613197,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.002152734436094761,
"rewards/format_reward_step": 0.0078125,
"rewards/stepwise_brier_reward": 0.002875569509342313,
"step": 17
},
{
"calib/answer_extract_rate": 0.03515625,
"calib/auroc": 0.6666666666666667,
"calib/avg_num_step_conf": 0.01953125,
"calib/ece": 0.5182293672839506,
"calib/final_conf_rate": 0.015625,
"calib/format_rate": 0.00390625,
"calib/frac_conf_gt_0.9": 0.75,
"calib/gap": 0.30902751028806585,
"calib/mean_conf": 0.7682293672839506,
"calib/mu_c": 1.0,
"calib/mu_w": 0.6909724897119341,
"calib/nonempty_final_conf_rate": 0.015625,
"calib/nonempty_reasoning_rate": 0.04296875,
"calib/nonempty_step_conf_rate": 0.01171875,
"calib/pce": 0.5182293672839506,
"calib/std_conf": 0.401438511566583,
"calib/step_conf_rate": 0.01171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2947.0,
"completions/max_terminated_length": 2947.0,
"completions/mean_length": 765.953125,
"completions/mean_terminated_length": 834.3999633789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0192,
"grad_norm": 0.001130632241256535,
"learning_rate": 4.5e-06,
"loss": 0.0129,
"num_tokens": 4277301.0,
"reward": 0.007290839217603207,
"reward_std": 0.020621608942747116,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.0038854805752635,
"rewards/format_reward_step": 0.00390625,
"rewards/stepwise_brier_reward": 0.0010798965813592076,
"step": 18
},
{
"calib/answer_extract_rate": 0.05859375,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.02734375,
"calib/ece": 0.648899549171724,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.0078125,
"calib/frac_conf_gt_0.9": 0.6363636363636364,
"calib/gap": 0.2862104959111036,
"calib/mean_conf": 0.7398086400808149,
"calib/mu_c": 1.0,
"calib/mu_w": 0.7137895040888964,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.015625,
"calib/pce": 0.648899549171724,
"calib/std_conf": 0.3887335985193895,
"calib/step_conf_rate": 0.015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3034.0,
"completions/max_terminated_length": 3034.0,
"completions/mean_length": 654.9296875,
"completions/mean_terminated_length": 722.6810302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.00234220246784389,
"learning_rate": 4.75e-06,
"loss": 0.0088,
"num_tokens": 4499291.0,
"reward": 0.009577165357768536,
"reward_std": 0.027088314294815063,
"rewards/accuracy_reward_step": 0.00390625,
"rewards/final_brier_reward_step": 0.004096976947039366,
"rewards/format_reward_step": 0.0078125,
"rewards/stepwise_brier_reward": 0.005114707630127668,
"step": 19
},
{
"calib/answer_extract_rate": 0.09765625,
"calib/auroc": 0.7333333333333334,
"calib/avg_num_step_conf": 0.078125,
"calib/ece": 0.3563636363636363,
"calib/final_conf_rate": 0.04296875,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.6363636363636364,
"calib/gap": 0.21400000000000008,
"calib/mean_conf": 0.8127272727272729,
"calib/mu_c": 0.91,
"calib/mu_w": 0.696,
"calib/nonempty_final_conf_rate": 0.04296875,
"calib/nonempty_reasoning_rate": 0.10546875,
"calib/nonempty_step_conf_rate": 0.03515625,
"calib/pce": 0.3118181818181818,
"calib/std_conf": 0.30558709245382104,
"calib/step_conf_rate": 0.03515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 3016.0,
"completions/max_terminated_length": 3016.0,
"completions/mean_length": 636.19140625,
"completions/mean_terminated_length": 705.0432739257812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.003776842262595892,
"learning_rate": 5e-06,
"loss": 0.0404,
"num_tokens": 4716596.0,
"reward": 0.05176505446434021,
"reward_std": 0.1317472606897354,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.01792929694056511,
"rewards/format_reward_step": 0.0234375,
"rewards/stepwise_brier_reward": 0.018076637759804726,
"step": 20
},
{
"calib/answer_extract_rate": 0.09765625,
"calib/auroc": 0.65,
"calib/avg_num_step_conf": 0.14453125,
"calib/ece": 0.5698058823529412,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.6470588235294118,
"calib/gap": 0.12194166666666661,
"calib/mean_conf": 0.8639235294117648,
"calib/mu_c": 0.95,
"calib/mu_w": 0.8280583333333333,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.1171875,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.5698058823529412,
"calib/std_conf": 0.21797371544027358,
"calib/step_conf_rate": 0.05078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 2776.0,
"completions/max_terminated_length": 2776.0,
"completions/mean_length": 708.9921875,
"completions/mean_terminated_length": 762.6134643554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.0224,
"grad_norm": 0.0033623469062149525,
"learning_rate": 4.9722222222222224e-06,
"loss": -0.0056,
"num_tokens": 4950626.0,
"reward": 0.036954864859580994,
"reward_std": 0.10452413558959961,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.011474609375,
"rewards/format_reward_step": 0.0234375,
"rewards/stepwise_brier_reward": 0.01862023025751114,
"step": 21
},
{
"calib/answer_extract_rate": 0.10546875,
"calib/auroc": 0.52,
"calib/avg_num_step_conf": 0.125,
"calib/ece": 0.5760666666666666,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.6,
"calib/gap": 0.0049000000000001265,
"calib/mean_conf": 0.8427333333333333,
"calib/mu_c": 0.8460000000000001,
"calib/mu_w": 0.8411,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.125,
"calib/nonempty_step_conf_rate": 0.0625,
"calib/pce": 0.5427333333333333,
"calib/std_conf": 0.2662591386016429,
"calib/step_conf_rate": 0.0625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 2857.0,
"completions/max_terminated_length": 2857.0,
"completions/mean_length": 667.05078125,
"completions/mean_terminated_length": 736.0560302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.0026941606774926186,
"learning_rate": 4.944444444444445e-06,
"loss": 0.0104,
"num_tokens": 5172775.0,
"reward": 0.04088283330202103,
"reward_std": 0.10474004596471786,
"rewards/accuracy_reward_step": 0.0234375,
"rewards/final_brier_reward_step": 0.012176171876490116,
"rewards/format_reward_step": 0.0234375,
"rewards/stepwise_brier_reward": 0.01730399578809738,
"step": 22
},
{
"calib/answer_extract_rate": 0.12890625,
"calib/auroc": 0.696078431372549,
"calib/avg_num_step_conf": 0.16796875,
"calib/ece": 0.5299442586399108,
"calib/final_conf_rate": 0.08984375,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.6086956521739131,
"calib/gap": 0.2491930618401207,
"calib/mean_conf": 0.790813823857302,
"calib/mu_c": 0.975,
"calib/mu_w": 0.7258069381598793,
"calib/nonempty_final_conf_rate": 0.08984375,
"calib/nonempty_reasoning_rate": 0.15625,
"calib/nonempty_step_conf_rate": 0.08203125,
"calib/pce": 0.5299442586399108,
"calib/std_conf": 0.3417305783731682,
"calib/step_conf_rate": 0.08203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3054.0,
"completions/max_terminated_length": 3054.0,
"completions/mean_length": 682.4453125,
"completions/mean_terminated_length": 740.2796630859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.002465737285092473,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.0166,
"num_tokens": 5400985.0,
"reward": 0.05742061138153076,
"reward_std": 0.1423000991344452,
"rewards/accuracy_reward_step": 0.03125,
"rewards/final_brier_reward_step": 0.02013828232884407,
"rewards/format_reward_step": 0.03515625,
"rewards/stepwise_brier_reward": 0.022218381986021996,
"step": 23
},
{
"calib/answer_extract_rate": 0.1328125,
"calib/auroc": 0.625,
"calib/avg_num_step_conf": 0.1484375,
"calib/ece": 0.46157368421052625,
"calib/final_conf_rate": 0.07421875,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.47368421052631576,
"calib/gap": 0.10405595238095233,
"calib/mean_conf": 0.8299947368421051,
"calib/mu_c": 0.8957142857142857,
"calib/mu_w": 0.7916583333333334,
"calib/nonempty_final_conf_rate": 0.07421875,
"calib/nonempty_reasoning_rate": 0.1484375,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.46157368421052625,
"calib/std_conf": 0.23670612575997937,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04296875,
"completions/max_length": 3003.0,
"completions/max_terminated_length": 3003.0,
"completions/mean_length": 681.9296875,
"completions/mean_terminated_length": 712.5469360351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0256,
"grad_norm": 0.0032444254029542208,
"learning_rate": 4.888888888888889e-06,
"loss": -0.0057,
"num_tokens": 5629639.0,
"reward": 0.05443684384226799,
"reward_std": 0.13633695244789124,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/final_brier_reward_step": 0.02127421647310257,
"rewards/format_reward_step": 0.03515625,
"rewards/stepwise_brier_reward": 0.02363644167780876,
"step": 24
},
{
"calib/answer_extract_rate": 0.20703125,
"calib/auroc": 0.6169950738916257,
"calib/avg_num_step_conf": 0.3125,
"calib/ece": 0.4677523200170542,
"calib/final_conf_rate": 0.16796875,
"calib/format_rate": 0.09375,
"calib/frac_conf_gt_0.9": 0.46511627906976744,
"calib/gap": 0.14476133829983573,
"calib/mean_conf": 0.6937988316449611,
"calib/mu_c": 0.7914285714285713,
"calib/mu_w": 0.6466672331287355,
"calib/nonempty_final_conf_rate": 0.16796875,
"calib/nonempty_reasoning_rate": 0.25,
"calib/nonempty_step_conf_rate": 0.14453125,
"calib/pce": 0.4179848781565891,
"calib/std_conf": 0.38119765456314086,
"calib/step_conf_rate": 0.14453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2873.0,
"completions/max_terminated_length": 2873.0,
"completions/mean_length": 641.03515625,
"completions/mean_terminated_length": 698.3191528320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.004970417357981205,
"learning_rate": 4.861111111111111e-06,
"loss": 0.007,
"num_tokens": 5846536.0,
"reward": 0.12949004769325256,
"reward_std": 0.21166762709617615,
"rewards/accuracy_reward_step": 0.05859375,
"rewards/final_brier_reward_step": 0.050851866602897644,
"rewards/format_reward_step": 0.09375,
"rewards/stepwise_brier_reward": 0.06938145309686661,
"step": 25
},
{
"calib/answer_extract_rate": 0.3046875,
"calib/auroc": 0.6566666666666666,
"calib/avg_num_step_conf": 0.625,
"calib/ece": 0.608535467579016,
"calib/final_conf_rate": 0.2421875,
"calib/format_rate": 0.1328125,
"calib/frac_conf_gt_0.9": 0.532258064516129,
"calib/gap": 0.13554531313131324,
"calib/mean_conf": 0.774022596937113,
"calib/mu_c": 0.8833333333333334,
"calib/mu_w": 0.7477880202020202,
"calib/nonempty_final_conf_rate": 0.2421875,
"calib/nonempty_reasoning_rate": 0.36328125,
"calib/nonempty_step_conf_rate": 0.19921875,
"calib/pce": 0.5945048387096774,
"calib/std_conf": 0.31876347751671763,
"calib/step_conf_rate": 0.19921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05859375,
"completions/max_length": 2741.0,
"completions/max_terminated_length": 2741.0,
"completions/mean_length": 634.7421875,
"completions/mean_terminated_length": 674.2490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.005587348248809576,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0142,
"num_tokens": 6063838.0,
"reward": 0.14023250341415405,
"reward_std": 0.27342501282691956,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.05323883146047592,
"rewards/format_reward_step": 0.1328125,
"rewards/stepwise_brier_reward": 0.0919523760676384,
"step": 26
},
{
"calib/answer_extract_rate": 0.2890625,
"calib/auroc": 0.6232558139534884,
"calib/avg_num_step_conf": 0.9453125,
"calib/ece": 0.7037980817610064,
"calib/final_conf_rate": 0.20703125,
"calib/format_rate": 0.1015625,
"calib/frac_conf_gt_0.9": 0.7358490566037735,
"calib/gap": 0.06330410852713164,
"calib/mean_conf": 0.8585150628930819,
"calib/mu_c": 0.9098749999999999,
"calib/mu_w": 0.8465708914728682,
"calib/nonempty_final_conf_rate": 0.20703125,
"calib/nonempty_reasoning_rate": 0.3359375,
"calib/nonempty_step_conf_rate": 0.1953125,
"calib/pce": 0.6868169496855348,
"calib/std_conf": 0.2723051982083245,
"calib/step_conf_rate": 0.1953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 3047.0,
"completions/max_terminated_length": 3047.0,
"completions/mean_length": 632.15234375,
"completions/mean_terminated_length": 674.2958984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 12.0,
"epoch": 0.0288,
"grad_norm": 0.005046192090958357,
"learning_rate": 4.805555555555556e-06,
"loss": 0.0392,
"num_tokens": 6280453.0,
"reward": 0.1070917472243309,
"reward_std": 0.2380264401435852,
"rewards/accuracy_reward_step": 0.05078125,
"rewards/final_brier_reward_step": 0.023514632135629654,
"rewards/format_reward_step": 0.1015625,
"rewards/stepwise_brier_reward": 0.056337736546993256,
"step": 27
},
{
"calib/answer_extract_rate": 0.3515625,
"calib/auroc": 0.5287569573283859,
"calib/avg_num_step_conf": 0.625,
"calib/ece": 0.543011719700939,
"calib/final_conf_rate": 0.27734375,
"calib/format_rate": 0.16796875,
"calib/frac_conf_gt_0.9": 0.5492957746478874,
"calib/gap": 0.0214253807919913,
"calib/mean_conf": 0.7671150408624413,
"calib/mu_c": 0.7819015712681818,
"calib/mu_w": 0.7604761904761905,
"calib/nonempty_final_conf_rate": 0.27734375,
"calib/nonempty_reasoning_rate": 0.41796875,
"calib/nonempty_step_conf_rate": 0.24609375,
"calib/pce": 0.5001338028169013,
"calib/std_conf": 0.3338678281776153,
"calib/step_conf_rate": 0.24609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2964.0,
"completions/max_terminated_length": 2964.0,
"completions/mean_length": 663.3046875,
"completions/mean_terminated_length": 690.2682495117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.004129378125071526,
"learning_rate": 4.777777777777778e-06,
"loss": 0.0671,
"num_tokens": 6506771.0,
"reward": 0.21467575430870056,
"reward_std": 0.39510178565979004,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.08527621626853943,
"rewards/format_reward_step": 0.16796875,
"rewards/stepwise_brier_reward": 0.11158812046051025,
"step": 28
},
{
"calib/answer_extract_rate": 0.41015625,
"calib/auroc": 0.5290178571428572,
"calib/avg_num_step_conf": 0.8671875,
"calib/ece": 0.6497173372093024,
"calib/final_conf_rate": 0.3359375,
"calib/format_rate": 0.1796875,
"calib/frac_conf_gt_0.9": 0.6395348837209303,
"calib/gap": 0.1153294142857143,
"calib/mean_conf": 0.8192522209302324,
"calib/mu_c": 0.913125,
"calib/mu_w": 0.7977955857142857,
"calib/nonempty_final_conf_rate": 0.3359375,
"calib/nonempty_reasoning_rate": 0.484375,
"calib/nonempty_step_conf_rate": 0.26953125,
"calib/pce": 0.641461523255814,
"calib/std_conf": 0.30291014471554223,
"calib/step_conf_rate": 0.26953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03515625,
"completions/max_length": 2969.0,
"completions/max_terminated_length": 2969.0,
"completions/mean_length": 624.68359375,
"completions/mean_terminated_length": 647.4453735351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.005222437437623739,
"learning_rate": 4.75e-06,
"loss": -0.0177,
"num_tokens": 6723386.0,
"reward": 0.2006719410419464,
"reward_std": 0.3653067946434021,
"rewards/accuracy_reward_step": 0.07421875,
"rewards/final_brier_reward_step": 0.08126065135002136,
"rewards/format_reward_step": 0.1796875,
"rewards/stepwise_brier_reward": 0.12766645848751068,
"step": 29
},
{
"calib/answer_extract_rate": 0.44921875,
"calib/auroc": 0.5370705244122966,
"calib/avg_num_step_conf": 1.7109375,
"calib/ece": 0.621134,
"calib/final_conf_rate": 0.390625,
"calib/format_rate": 0.234375,
"calib/frac_conf_gt_0.9": 0.56,
"calib/gap": 0.06032754671488849,
"calib/mean_conf": 0.794246,
"calib/mu_c": 0.8419047619047619,
"calib/mu_w": 0.7815772151898734,
"calib/nonempty_final_conf_rate": 0.390625,
"calib/nonempty_reasoning_rate": 0.55078125,
"calib/nonempty_step_conf_rate": 0.36328125,
"calib/pce": 0.60269,
"calib/std_conf": 0.3094254919750472,
"calib/step_conf_rate": 0.36328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2915.0,
"completions/max_terminated_length": 2915.0,
"completions/mean_length": 660.0,
"completions/mean_terminated_length": 681.290283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 45.0,
"epoch": 0.032,
"grad_norm": 0.005847262218594551,
"learning_rate": 4.722222222222222e-06,
"loss": 0.0274,
"num_tokens": 6948898.0,
"reward": 0.24672269821166992,
"reward_std": 0.4171825647354126,
"rewards/accuracy_reward_step": 0.09375,
"rewards/final_brier_reward_step": 0.09664709866046906,
"rewards/format_reward_step": 0.234375,
"rewards/stepwise_brier_reward": 0.13734658062458038,
"step": 30
},
{
"calib/answer_extract_rate": 0.5546875,
"calib/auroc": 0.5846328784925277,
"calib/avg_num_step_conf": 1.4921875,
"calib/ece": 0.5767189827800069,
"calib/final_conf_rate": 0.55078125,
"calib/format_rate": 0.3203125,
"calib/frac_conf_gt_0.9": 0.475177304964539,
"calib/gap": 0.11232585965396824,
"calib/mean_conf": 0.7452722364027018,
"calib/mu_c": 0.8360888888888889,
"calib/mu_w": 0.7237630292349206,
"calib/nonempty_final_conf_rate": 0.55078125,
"calib/nonempty_reasoning_rate": 0.640625,
"calib/nonempty_step_conf_rate": 0.4296875,
"calib/pce": 0.5652509287402906,
"calib/std_conf": 0.3307988490229472,
"calib/step_conf_rate": 0.4296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2997.0,
"completions/max_terminated_length": 2997.0,
"completions/mean_length": 638.125,
"completions/mean_terminated_length": 648.2540283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.004652371630072594,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0786,
"num_tokens": 7167738.0,
"reward": 0.32629090547561646,
"reward_std": 0.43779438734054565,
"rewards/accuracy_reward_step": 0.109375,
"rewards/final_brier_reward_step": 0.14228935539722443,
"rewards/format_reward_step": 0.3203125,
"rewards/stepwise_brier_reward": 0.1987099051475525,
"step": 31
},
{
"calib/answer_extract_rate": 0.5859375,
"calib/auroc": 0.5058571428571428,
"calib/avg_num_step_conf": 1.4765625,
"calib/ece": 0.5549560987654322,
"calib/final_conf_rate": 0.52734375,
"calib/format_rate": 0.328125,
"calib/frac_conf_gt_0.9": 0.4666666666666667,
"calib/gap": -0.020848761904761925,
"calib/mean_conf": 0.7469080987654321,
"calib/mu_c": 0.7314645714285715,
"calib/mu_w": 0.7523133333333334,
"calib/nonempty_final_conf_rate": 0.52734375,
"calib/nonempty_reasoning_rate": 0.67578125,
"calib/nonempty_step_conf_rate": 0.4609375,
"calib/pce": 0.5213024691358026,
"calib/std_conf": 0.32492967651162474,
"calib/step_conf_rate": 0.4609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2863.0,
"completions/max_terminated_length": 2863.0,
"completions/mean_length": 560.6875,
"completions/mean_terminated_length": 569.5873413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 11.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.005850342568010092,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0303,
"num_tokens": 7367546.0,
"reward": 0.38101819157600403,
"reward_std": 0.45329520106315613,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.1564299762248993,
"rewards/format_reward_step": 0.328125,
"rewards/stepwise_brier_reward": 0.19246289134025574,
"step": 32
},
{
"calib/answer_extract_rate": 0.69921875,
"calib/auroc": 0.550956156716418,
"calib/avg_num_step_conf": 1.8984375,
"calib/ece": 0.6287451464439183,
"calib/final_conf_rate": 0.6484375,
"calib/format_rate": 0.45703125,
"calib/frac_conf_gt_0.9": 0.5,
"calib/gap": 0.0035832961963400134,
"calib/mean_conf": 0.7702324596969303,
"calib/mu_c": 0.773125,
"calib/mu_w": 0.7695417038036599,
"calib/nonempty_final_conf_rate": 0.6484375,
"calib/nonempty_reasoning_rate": 0.76171875,
"calib/nonempty_step_conf_rate": 0.55859375,
"calib/pce": 0.6031032609017496,
"calib/std_conf": 0.3125446232054246,
"calib/step_conf_rate": 0.55859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2298.0,
"completions/max_terminated_length": 2298.0,
"completions/mean_length": 536.46484375,
"completions/mean_terminated_length": 544.9801635742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 27.0,
"epoch": 0.0352,
"grad_norm": 0.005888388957828283,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0636,
"num_tokens": 7561321.0,
"reward": 0.4416656196117401,
"reward_std": 0.5061711668968201,
"rewards/accuracy_reward_step": 0.1328125,
"rewards/final_brier_reward_step": 0.19741714000701904,
"rewards/format_reward_step": 0.45703125,
"rewards/stepwise_brier_reward": 0.2921407222747803,
"step": 33
},
{
"calib/answer_extract_rate": 0.76171875,
"calib/auroc": 0.5271682340647859,
"calib/avg_num_step_conf": 2.63671875,
"calib/ece": 0.6528921370344805,
"calib/final_conf_rate": 0.7578125,
"calib/format_rate": 0.48046875,
"calib/frac_conf_gt_0.9": 0.5257731958762887,
"calib/gap": 0.048052003561966816,
"calib/mean_conf": 0.7936137865190167,
"calib/mu_c": 0.8344827586206895,
"calib/mu_w": 0.7864307550587227,
"calib/nonempty_final_conf_rate": 0.7578125,
"calib/nonempty_reasoning_rate": 0.8828125,
"calib/nonempty_step_conf_rate": 0.6484375,
"calib/pce": 0.6485106937355114,
"calib/std_conf": 0.2976252390781631,
"calib/step_conf_rate": 0.6484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2677.0,
"completions/max_terminated_length": 2677.0,
"completions/mean_length": 488.46875,
"completions/mean_terminated_length": 490.38433837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 27.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.006886497605592012,
"learning_rate": 4.611111111111112e-06,
"loss": -0.0012,
"num_tokens": 7741049.0,
"reward": 0.44409021735191345,
"reward_std": 0.523621678352356,
"rewards/accuracy_reward_step": 0.12109375,
"rewards/final_brier_reward_step": 0.19314272701740265,
"rewards/format_reward_step": 0.48046875,
"rewards/stepwise_brier_reward": 0.329137921333313,
"step": 34
},
{
"calib/answer_extract_rate": 0.7578125,
"calib/auroc": 0.5019651401230348,
"calib/avg_num_step_conf": 3.12890625,
"calib/ece": 0.5560118690291648,
"calib/final_conf_rate": 0.75,
"calib/format_rate": 0.48046875,
"calib/frac_conf_gt_0.9": 0.40625,
"calib/gap": 0.0016982368679209214,
"calib/mean_conf": 0.712848398827978,
"calib/mu_c": 0.7142105263157895,
"calib/mu_w": 0.7125122894478686,
"calib/nonempty_final_conf_rate": 0.75,
"calib/nonempty_reasoning_rate": 0.8359375,
"calib/nonempty_step_conf_rate": 0.60546875,
"calib/pce": 0.535471800595238,
"calib/std_conf": 0.3321984379710554,
"calib/step_conf_rate": 0.60546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2629.0,
"completions/max_terminated_length": 2629.0,
"completions/mean_length": 545.8203125,
"completions/mean_terminated_length": 550.1181030273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 23.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.005417841020971537,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0193,
"num_tokens": 7939603.0,
"reward": 0.48744523525238037,
"reward_std": 0.5235015153884888,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.21776510775089264,
"rewards/format_reward_step": 0.48046875,
"rewards/stepwise_brier_reward": 0.32831326127052307,
"step": 35
},
{
"calib/answer_extract_rate": 0.8203125,
"calib/auroc": 0.4933014354066986,
"calib/avg_num_step_conf": 2.86328125,
"calib/ece": 0.5209496376811605,
"calib/final_conf_rate": 0.80859375,
"calib/format_rate": 0.6171875,
"calib/frac_conf_gt_0.9": 0.3961352657004831,
"calib/gap": -0.03546680622009957,
"calib/mean_conf": 0.6958955314009672,
"calib/mu_c": 0.669852272727271,
"calib/mu_w": 0.7053190789473706,
"calib/nonempty_final_conf_rate": 0.80859375,
"calib/nonempty_reasoning_rate": 0.90234375,
"calib/nonempty_step_conf_rate": 0.71875,
"calib/pce": 0.47557234299517026,
"calib/std_conf": 0.3317535883627056,
"calib/step_conf_rate": 0.71875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1874.0,
"completions/max_terminated_length": 1874.0,
"completions/mean_length": 446.47265625,
"completions/mean_terminated_length": 448.2235412597656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.0384,
"grad_norm": 0.006148908287286758,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0568,
"num_tokens": 8106180.0,
"reward": 0.6918026208877563,
"reward_std": 0.6030615568161011,
"rewards/accuracy_reward_step": 0.23046875,
"rewards/final_brier_reward_step": 0.32322216033935547,
"rewards/format_reward_step": 0.6171875,
"rewards/stepwise_brier_reward": 0.45826616883277893,
"step": 36
},
{
"calib/answer_extract_rate": 0.81640625,
"calib/auroc": 0.5715029761904762,
"calib/avg_num_step_conf": 2.6953125,
"calib/ece": 0.5662541586057692,
"calib/final_conf_rate": 0.8125,
"calib/format_rate": 0.57421875,
"calib/frac_conf_gt_0.9": 0.40384615384615385,
"calib/gap": 0.057586398869047595,
"calib/mean_conf": 0.7308629086057692,
"calib/mu_c": 0.7773749999999999,
"calib/mu_w": 0.7197886011309523,
"calib/nonempty_final_conf_rate": 0.8125,
"calib/nonempty_reasoning_rate": 0.90234375,
"calib/nonempty_step_conf_rate": 0.6875,
"calib/pce": 0.5524046874519231,
"calib/std_conf": 0.30629556282873216,
"calib/step_conf_rate": 0.6875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2216.0,
"completions/max_terminated_length": 2216.0,
"completions/mean_length": 478.390625,
"completions/mean_terminated_length": 480.2666931152344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 10.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.0059788888320326805,
"learning_rate": 4.527777777777778e-06,
"loss": -0.0013,
"num_tokens": 8285312.0,
"reward": 0.5644609928131104,
"reward_std": 0.5739938020706177,
"rewards/accuracy_reward_step": 0.16015625,
"rewards/final_brier_reward_step": 0.26444345712661743,
"rewards/format_reward_step": 0.57421875,
"rewards/stepwise_brier_reward": 0.3992696702480316,
"step": 37
},
{
"calib/answer_extract_rate": 0.81640625,
"calib/auroc": 0.5535502958579882,
"calib/avg_num_step_conf": 3.20703125,
"calib/ece": 0.4963351124697266,
"calib/final_conf_rate": 0.81640625,
"calib/format_rate": 0.62890625,
"calib/frac_conf_gt_0.9": 0.31100478468899523,
"calib/gap": 0.06486359794177798,
"calib/mean_conf": 0.6674671794553724,
"calib/mu_c": 0.7199166916666666,
"calib/mu_w": 0.6550530937248886,
"calib/nonempty_final_conf_rate": 0.81640625,
"calib/nonempty_reasoning_rate": 0.92578125,
"calib/nonempty_step_conf_rate": 0.76171875,
"calib/pce": 0.48620736605824333,
"calib/std_conf": 0.32891209907867613,
"calib/step_conf_rate": 0.76171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2375.0,
"completions/max_terminated_length": 2375.0,
"completions/mean_length": 479.21875,
"completions/mean_terminated_length": 484.9012145996094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 35.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.005851101130247116,
"learning_rate": 4.5e-06,
"loss": 0.0499,
"num_tokens": 8464448.0,
"reward": 0.6287873983383179,
"reward_std": 0.5634995102882385,
"rewards/accuracy_reward_step": 0.15625,
"rewards/final_brier_reward_step": 0.338528573513031,
"rewards/format_reward_step": 0.62890625,
"rewards/stepwise_brier_reward": 0.45840492844581604,
"step": 38
},
{
"calib/answer_extract_rate": 0.9140625,
"calib/auroc": 0.522828947368421,
"calib/avg_num_step_conf": 3.29296875,
"calib/ece": 0.5117534932515357,
"calib/final_conf_rate": 0.8984375,
"calib/format_rate": 0.76171875,
"calib/frac_conf_gt_0.9": 0.2826086956521739,
"calib/gap": 0.019911174835860357,
"calib/mean_conf": 0.6628849715124053,
"calib/mu_c": 0.6793333333333333,
"calib/mu_w": 0.659422158497473,
"calib/nonempty_final_conf_rate": 0.8984375,
"calib/nonempty_reasoning_rate": 0.96875,
"calib/nonempty_step_conf_rate": 0.85546875,
"calib/pce": 0.5003627106428401,
"calib/std_conf": 0.3261157697479711,
"calib/step_conf_rate": 0.85546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1507.0,
"completions/max_terminated_length": 1507.0,
"completions/mean_length": 410.53515625,
"completions/mean_terminated_length": 412.1451110839844,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.0416,
"grad_norm": 0.006558020133525133,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0296,
"num_tokens": 8625201.0,
"reward": 0.73180091381073,
"reward_std": 0.47666364908218384,
"rewards/accuracy_reward_step": 0.16796875,
"rewards/final_brier_reward_step": 0.391775906085968,
"rewards/format_reward_step": 0.76171875,
"rewards/stepwise_brier_reward": 0.5577144622802734,
"step": 39
},
{
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5290841584158416,
"calib/avg_num_step_conf": 3.2890625,
"calib/ece": 0.5333832492997199,
"calib/final_conf_rate": 0.9296875,
"calib/format_rate": 0.8125,
"calib/frac_conf_gt_0.9": 0.25630252100840334,
"calib/gap": 0.03175771727172716,
"calib/mean_conf": 0.6602765266106443,
"calib/mu_c": 0.6872305555555555,
"calib/mu_w": 0.6554728382838283,
"calib/nonempty_final_conf_rate": 0.9296875,
"calib/nonempty_reasoning_rate": 0.96875,
"calib/nonempty_step_conf_rate": 0.8671875,
"calib/pce": 0.5211996358543418,
"calib/std_conf": 0.3150771534789929,
"calib/step_conf_rate": 0.8671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1828.0,
"completions/max_terminated_length": 1828.0,
"completions/mean_length": 442.69921875,
"completions/mean_terminated_length": 444.4353332519531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 48.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.00583281833678484,
"learning_rate": 4.444444444444444e-06,
"loss": -0.0175,
"num_tokens": 8794860.0,
"reward": 0.7500452399253845,
"reward_std": 0.5234329700469971,
"rewards/accuracy_reward_step": 0.14453125,
"rewards/final_brier_reward_step": 0.4331750273704529,
"rewards/format_reward_step": 0.8125,
"rewards/stepwise_brier_reward": 0.5807058811187744,
"step": 40
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.534021978021978,
"calib/avg_num_step_conf": 3.30078125,
"calib/ece": 0.44919504960317475,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.8203125,
"calib/frac_conf_gt_0.9": 0.2791666666666667,
"calib/gap": 0.03207007116692839,
"calib/mean_conf": 0.6562501884920634,
"calib/mu_c": 0.6796346153846154,
"calib/mu_w": 0.647564544217687,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.87890625,
"calib/pce": 0.41730595238095247,
"calib/std_conf": 0.32355005709777834,
"calib/step_conf_rate": 0.87890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1536.0,
"completions/mean_length": 418.36328125,
"completions/mean_terminated_length": 420.0039367675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.006292893085628748,
"learning_rate": 4.416666666666667e-06,
"loss": 0.0023,
"num_tokens": 8958777.0,
"reward": 0.8849216103553772,
"reward_std": 0.5418712496757507,
"rewards/accuracy_reward_step": 0.2578125,
"rewards/final_brier_reward_step": 0.46316659450531006,
"rewards/format_reward_step": 0.8203125,
"rewards/stepwise_brier_reward": 0.597728431224823,
"step": 41
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.6087782721542454,
"calib/avg_num_step_conf": 3.39453125,
"calib/ece": 0.405187424590164,
"calib/final_conf_rate": 0.953125,
"calib/format_rate": 0.87890625,
"calib/frac_conf_gt_0.9": 0.24180327868852458,
"calib/gap": 0.11658757256210595,
"calib/mean_conf": 0.6234431081967213,
"calib/mu_c": 0.7123172413793103,
"calib/mu_w": 0.5957296688172043,
"calib/nonempty_final_conf_rate": 0.953125,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.3954628073770493,
"calib/std_conf": 0.32575273275629457,
"calib/step_conf_rate": 0.93359375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1634.0,
"completions/max_terminated_length": 1634.0,
"completions/mean_length": 393.47265625,
"completions/mean_terminated_length": 393.47265625,
"completions/min_length": 44.0,
"completions/min_terminated_length": 44.0,
"epoch": 0.0448,
"grad_norm": 0.006545498501509428,
"learning_rate": 4.388888888888889e-06,
"loss": -0.0028,
"num_tokens": 9113442.0,
"reward": 0.9316054582595825,
"reward_std": 0.5143445730209351,
"rewards/accuracy_reward_step": 0.234375,
"rewards/final_brier_reward_step": 0.533115029335022,
"rewards/format_reward_step": 0.87890625,
"rewards/stepwise_brier_reward": 0.6680043935775757,
"step": 42
},
{
"calib/answer_extract_rate": 0.9375,
"calib/auroc": 0.5251258973534769,
"calib/avg_num_step_conf": 3.13671875,
"calib/ece": 0.3751279607492878,
"calib/final_conf_rate": 0.9140625,
"calib/format_rate": 0.85546875,
"calib/frac_conf_gt_0.9": 0.18376068376068377,
"calib/gap": 0.029125472668809604,
"calib/mean_conf": 0.566652974994302,
"calib/mu_c": 0.589430588235294,
"calib/mu_w": 0.5603051155664844,
"calib/nonempty_final_conf_rate": 0.9140625,
"calib/nonempty_reasoning_rate": 0.9609375,
"calib/nonempty_step_conf_rate": 0.91015625,
"calib/pce": 0.3619161088974359,
"calib/std_conf": 0.30766516574131414,
"calib/step_conf_rate": 0.91015625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2194.0,
"completions/max_terminated_length": 2194.0,
"completions/mean_length": 443.0390625,
"completions/mean_terminated_length": 443.0390625,
"completions/min_length": 58.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.006089336704462767,
"learning_rate": 4.361111111111112e-06,
"loss": 0.007,
"num_tokens": 9281652.0,
"reward": 0.903231143951416,
"reward_std": 0.49017244577407837,
"rewards/accuracy_reward_step": 0.203125,
"rewards/final_brier_reward_step": 0.5439672470092773,
"rewards/format_reward_step": 0.85546875,
"rewards/stepwise_brier_reward": 0.6859275102615356,
"step": 43
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5084262380088151,
"calib/avg_num_step_conf": 3.11328125,
"calib/ece": 0.39130325034578145,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.89453125,
"calib/frac_conf_gt_0.9": 0.15767634854771784,
"calib/gap": 0.011761451041396698,
"calib/mean_conf": 0.5245667358229599,
"calib/mu_c": 0.5344736842105263,
"calib/mu_w": 0.5227122331691296,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.93359375,
"calib/pce": 0.3790968188105117,
"calib/std_conf": 0.32499933248325935,
"calib/step_conf_rate": 0.93359375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1745.0,
"completions/max_terminated_length": 1745.0,
"completions/mean_length": 456.28515625,
"completions/mean_terminated_length": 458.07452392578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.005765858571976423,
"learning_rate": 4.333333333333334e-06,
"loss": -0.0189,
"num_tokens": 9454349.0,
"reward": 0.8806015253067017,
"reward_std": 0.4581907093524933,
"rewards/accuracy_reward_step": 0.15234375,
"rewards/final_brier_reward_step": 0.5674974322319031,
"rewards/format_reward_step": 0.89453125,
"rewards/stepwise_brier_reward": 0.7045985460281372,
"step": 44
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.48581151832460734,
"calib/avg_num_step_conf": 3.33203125,
"calib/ece": 0.3394438027953701,
"calib/final_conf_rate": 0.94140625,
"calib/format_rate": 0.91015625,
"calib/frac_conf_gt_0.9": 0.08298755186721991,
"calib/gap": -0.013969121206944157,
"calib/mean_conf": 0.4948034127538764,
"calib/mu_c": 0.4837324494736841,
"calib/mu_w": 0.49770157068062826,
"calib/nonempty_final_conf_rate": 0.94140625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.96484375,
"calib/pce": 0.31338916794059835,
"calib/std_conf": 0.3010887186963321,
"calib/step_conf_rate": 0.96484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2407.0,
"completions/max_terminated_length": 2407.0,
"completions/mean_length": 392.328125,
"completions/mean_terminated_length": 392.328125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"epoch": 0.048,
"grad_norm": 0.006556249689310789,
"learning_rate": 4.305555555555556e-06,
"loss": -0.0063,
"num_tokens": 9609401.0,
"reward": 0.9578429460525513,
"reward_std": 0.4669501483440399,
"rewards/accuracy_reward_step": 0.19921875,
"rewards/final_brier_reward_step": 0.6010839939117432,
"rewards/format_reward_step": 0.91015625,
"rewards/stepwise_brier_reward": 0.7401412725448608,
"step": 45
},
{
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.5820621468926553,
"calib/avg_num_step_conf": 3.0390625,
"calib/ece": 0.2903058577405858,
"calib/final_conf_rate": 0.93359375,
"calib/format_rate": 0.8828125,
"calib/frac_conf_gt_0.9": 0.100418410041841,
"calib/gap": 0.09431298493408669,
"calib/mean_conf": 0.4787150627615063,
"calib/mu_c": 0.5497457627118645,
"calib/mu_w": 0.4554327777777778,
"calib/nonempty_final_conf_rate": 0.93359375,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.94140625,
"calib/pce": 0.2610794979079498,
"calib/std_conf": 0.31240829553709604,
"calib/step_conf_rate": 0.94140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2781.0,
"completions/max_terminated_length": 2781.0,
"completions/mean_length": 424.87890625,
"completions/mean_terminated_length": 424.87890625,
"completions/min_length": 43.0,
"completions/min_terminated_length": 43.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.006347313988953829,
"learning_rate": 4.277777777777778e-06,
"loss": 0.016,
"num_tokens": 9772506.0,
"reward": 0.9869669079780579,
"reward_std": 0.4626862406730652,
"rewards/accuracy_reward_step": 0.23046875,
"rewards/final_brier_reward_step": 0.6165705919265747,
"rewards/format_reward_step": 0.8828125,
"rewards/stepwise_brier_reward": 0.7334764003753662,
"step": 46
},
{
"calib/answer_extract_rate": 0.9453125,
"calib/auroc": 0.4998717510259918,
"calib/avg_num_step_conf": 2.8125,
"calib/ece": 0.3017594949537037,
"calib/final_conf_rate": 0.9375,
"calib/format_rate": 0.875,
"calib/frac_conf_gt_0.9": 0.07083333333333333,
"calib/gap": 0.006677570934032595,
"calib/mean_conf": 0.43197911337962963,
"calib/mu_c": 0.436764705882353,
"calib/mu_w": 0.4300871349483204,
"calib/nonempty_final_conf_rate": 0.9375,
"calib/nonempty_reasoning_rate": 0.97265625,
"calib/nonempty_step_conf_rate": 0.9140625,
"calib/pce": 0.22520263749999997,
"calib/std_conf": 0.3029079213144214,
"calib/step_conf_rate": 0.9140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1359.0,
"completions/max_terminated_length": 1359.0,
"completions/mean_length": 420.1640625,
"completions/mean_terminated_length": 421.8117980957031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.0059139239601790905,
"learning_rate": 4.25e-06,
"loss": 0.0216,
"num_tokens": 9935612.0,
"reward": 1.0119876861572266,
"reward_std": 0.45886707305908203,
"rewards/accuracy_reward_step": 0.2734375,
"rewards/final_brier_reward_step": 0.5972976684570312,
"rewards/format_reward_step": 0.875,
"rewards/stepwise_brier_reward": 0.7096055746078491,
"step": 47
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.46787658802177856,
"calib/avg_num_step_conf": 3.1640625,
"calib/ece": 0.31055860215053765,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.91796875,
"calib/frac_conf_gt_0.9": 0.06854838709677419,
"calib/gap": -0.032152534785238995,
"calib/mean_conf": 0.4296674731182796,
"calib/mu_c": 0.40503448275862075,
"calib/mu_w": 0.43718701754385975,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.95703125,
"calib/pce": 0.25317755376344087,
"calib/std_conf": 0.29134742010503306,
"calib/step_conf_rate": 0.95703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1337.0,
"completions/max_terminated_length": 1337.0,
"completions/mean_length": 400.90625,
"completions/mean_terminated_length": 402.47845458984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.0512,
"grad_norm": 0.00578545406460762,
"learning_rate": 4.222222222222223e-06,
"loss": 0.0263,
"num_tokens": 10091500.0,
"reward": 1.0206351280212402,
"reward_std": 0.38921114802360535,
"rewards/accuracy_reward_step": 0.23828125,
"rewards/final_brier_reward_step": 0.6308324933052063,
"rewards/format_reward_step": 0.91796875,
"rewards/stepwise_brier_reward": 0.7661881446838379,
"step": 48
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5342419208698279,
"calib/avg_num_step_conf": 3.265625,
"calib/ece": 0.2022221817092933,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.040160642570281124,
"calib/gap": 0.017430107211739654,
"calib/mean_conf": 0.36531401571197075,
"calib/mu_c": 0.3773540897698792,
"calib/mu_w": 0.35992398255813957,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.12914962481504966,
"calib/std_conf": 0.2741067596898534,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2104.0,
"completions/max_terminated_length": 2104.0,
"completions/mean_length": 398.07421875,
"completions/mean_terminated_length": 398.07421875,
"completions/min_length": 60.0,
"completions/min_terminated_length": 60.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.0057755582965910435,
"learning_rate": 4.194444444444445e-06,
"loss": -0.0006,
"num_tokens": 10247511.0,
"reward": 1.1383147239685059,
"reward_std": 0.3906211256980896,
"rewards/accuracy_reward_step": 0.3046875,
"rewards/final_brier_reward_step": 0.6833595037460327,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.8240399956703186,
"step": 49
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5059739866908651,
"calib/avg_num_step_conf": 2.75390625,
"calib/ece": 0.22803238084848487,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 0.02,
"calib/gap": 0.0008633480723753695,
"calib/mean_conf": 0.3828160555151515,
"calib/mu_c": 0.38341694577352475,
"calib/mu_w": 0.3825535977011494,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.15342421818181817,
"calib/std_conf": 0.2580277267496595,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2193.0,
"completions/max_terminated_length": 2193.0,
"completions/mean_length": 458.51953125,
"completions/mean_terminated_length": 460.31768798828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 50.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.005000683479011059,
"learning_rate": 4.166666666666667e-06,
"loss": -0.0097,
"num_tokens": 10419820.0,
"reward": 1.1152260303497314,
"reward_std": 0.3721734881401062,
"rewards/accuracy_reward_step": 0.30078125,
"rewards/final_brier_reward_step": 0.6713389158248901,
"rewards/format_reward_step": 0.9375,
"rewards/stepwise_brier_reward": 0.7901011109352112,
"step": 50
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.48169112508735146,
"calib/avg_num_step_conf": 2.8515625,
"calib/ece": 0.2317144578313253,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.028112449799196786,
"calib/gap": -0.024234863731656175,
"calib/mean_conf": 0.349529718875502,
"calib/mu_c": 0.3340544444444445,
"calib/mu_w": 0.35828930817610066,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.1098991967871486,
"calib/std_conf": 0.24695792887627788,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1808.0,
"completions/max_terminated_length": 1808.0,
"completions/mean_length": 421.43359375,
"completions/mean_terminated_length": 423.0863037109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 53.0,
"epoch": 0.0544,
"grad_norm": 0.005766298621892929,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0076,
"num_tokens": 10586571.0,
"reward": 1.1778815984725952,
"reward_std": 0.35190486907958984,
"rewards/accuracy_reward_step": 0.359375,
"rewards/final_brier_reward_step": 0.6635822653770447,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.807799220085144,
"step": 51
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.5847146739130435,
"calib/avg_num_step_conf": 2.4140625,
"calib/ece": 0.14099603174603176,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.003968253968253968,
"calib/gap": 0.0635904891304348,
"calib/mean_conf": 0.29614682539682535,
"calib/mu_c": 0.3365217391304348,
"calib/mu_w": 0.27293125,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.036031746031746026,
"calib/std_conf": 0.21578038114422565,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1771.0,
"completions/max_terminated_length": 1771.0,
"completions/mean_length": 463.85546875,
"completions/mean_terminated_length": 463.85546875,
"completions/min_length": 57.0,
"completions/min_terminated_length": 57.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.00548426853492856,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0218,
"num_tokens": 10762838.0,
"reward": 1.2175707817077637,
"reward_std": 0.39718616008758545,
"rewards/accuracy_reward_step": 0.359375,
"rewards/final_brier_reward_step": 0.7146013379096985,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.8504550457000732,
"step": 52
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5517492711370263,
"calib/avg_num_step_conf": 2.8359375,
"calib/ece": 0.19429305555555557,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.007936507936507936,
"calib/gap": 0.05148232838589978,
"calib/mean_conf": 0.32177837301587303,
"calib/mu_c": 0.35323979591836735,
"calib/mu_w": 0.30175746753246757,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.06359126984126984,
"calib/std_conf": 0.20863787849712598,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1424.0,
"completions/max_terminated_length": 1424.0,
"completions/mean_length": 467.796875,
"completions/mean_terminated_length": 469.63140869140625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 54.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.005103487987071276,
"learning_rate": 4.083333333333334e-06,
"loss": -0.0497,
"num_tokens": 10937986.0,
"reward": 1.2299796342849731,
"reward_std": 0.37857282161712646,
"rewards/accuracy_reward_step": 0.38671875,
"rewards/final_brier_reward_step": 0.7058714628219604,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.8128631711006165,
"step": 53
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.519959451901566,
"calib/avg_num_step_conf": 2.625,
"calib/ece": 0.20242142857142856,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.9453125,
"calib/frac_conf_gt_0.9": 0.004081632653061225,
"calib/gap": 0.010882672678970917,
"calib/mean_conf": 0.2956602040816327,
"calib/mu_c": 0.30227864583333336,
"calib/mu_w": 0.29139597315436244,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.05312244897959184,
"calib/std_conf": 0.20148572192781972,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1210.0,
"completions/max_terminated_length": 1210.0,
"completions/mean_length": 422.3046875,
"completions/mean_terminated_length": 423.9608154296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 37.0,
"epoch": 0.0576,
"grad_norm": 0.005721841938793659,
"learning_rate": 4.055555555555556e-06,
"loss": -0.0556,
"num_tokens": 11101896.0,
"reward": 1.2086176872253418,
"reward_std": 0.3556395173072815,
"rewards/accuracy_reward_step": 0.3828125,
"rewards/final_brier_reward_step": 0.6776452660560608,
"rewards/format_reward_step": 0.9453125,
"rewards/stepwise_brier_reward": 0.8135550022125244,
"step": 54
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6061990335246148,
"calib/avg_num_step_conf": 2.640625,
"calib/ece": 0.12081600475234272,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.004016064257028112,
"calib/gap": 0.06854713523985712,
"calib/mean_conf": 0.2913125093038822,
"calib/mu_c": 0.3386623376623377,
"calib/mu_w": 0.2701152024224806,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.96875,
"calib/pce": 0.05144578313253013,
"calib/std_conf": 0.19960611928322558,
"calib/step_conf_rate": 0.96875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2552.0,
"completions/max_terminated_length": 2552.0,
"completions/mean_length": 460.8828125,
"completions/mean_terminated_length": 460.8828125,
"completions/min_length": 35.0,
"completions/min_terminated_length": 35.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.005817664321511984,
"learning_rate": 4.027777777777779e-06,
"loss": -0.0206,
"num_tokens": 11277274.0,
"reward": 1.162182331085205,
"reward_std": 0.383277028799057,
"rewards/accuracy_reward_step": 0.3046875,
"rewards/final_brier_reward_step": 0.7293254137039185,
"rewards/format_reward_step": 0.94140625,
"rewards/stepwise_brier_reward": 0.8416409492492676,
"step": 55
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4731471135940409,
"calib/avg_num_step_conf": 2.30078125,
"calib/ece": 0.17838267716535436,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.003937007874015748,
"calib/gap": -0.02074724022346361,
"calib/mean_conf": 0.25350708661417326,
"calib/mu_c": 0.23888600000000007,
"calib/mu_w": 0.2596332402234637,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.06830708661417324,
"calib/std_conf": 0.19904568830411037,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1547.0,
"completions/max_terminated_length": 1547.0,
"completions/mean_length": 452.2421875,
"completions/mean_terminated_length": 454.0157165527344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 80.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.0054603926837444305,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0393,
"num_tokens": 11449456.0,
"reward": 1.1504205465316772,
"reward_std": 0.33952397108078003,
"rewards/accuracy_reward_step": 0.296875,
"rewards/final_brier_reward_step": 0.7038324475288391,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.8580799102783203,
"step": 56
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5687116564417178,
"calib/avg_num_step_conf": 2.39453125,
"calib/ece": 0.16990777338603427,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.04262740286298575,
"calib/mean_conf": 0.25973649538866933,
"calib/mu_c": 0.2872,
"calib/mu_w": 0.24457259713701426,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.03695652173913043,
"calib/std_conf": 0.19891979247216385,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1169.0,
"completions/max_terminated_length": 1169.0,
"completions/mean_length": 433.8671875,
"completions/mean_terminated_length": 435.56866455078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.0608,
"grad_norm": 0.005786188878118992,
"learning_rate": 3.972222222222223e-06,
"loss": -0.0085,
"num_tokens": 11616886.0,
"reward": 1.2042686939239502,
"reward_std": 0.37658119201660156,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.7032498717308044,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.8449498414993286,
"step": 57
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.557451306054658,
"calib/avg_num_step_conf": 2.296875,
"calib/ece": 0.15416205533596838,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.038548203231164146,
"calib/mean_conf": 0.2767470355731226,
"calib/mu_c": 0.3040202702702703,
"calib/mu_w": 0.26547206703910614,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0692094861660079,
"calib/std_conf": 0.20202418661481644,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2153.0,
"completions/max_terminated_length": 2153.0,
"completions/mean_length": 492.640625,
"completions/mean_terminated_length": 492.640625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.005370710510760546,
"learning_rate": 3.944444444444445e-06,
"loss": 0.0023,
"num_tokens": 11798890.0,
"reward": 1.1722075939178467,
"reward_std": 0.3359895348548889,
"rewards/accuracy_reward_step": 0.2890625,
"rewards/final_brier_reward_step": 0.7481820583343506,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.864341139793396,
"step": 58
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5563291139240506,
"calib/avg_num_step_conf": 2.15625,
"calib/ece": 0.17481159420289852,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.007905138339920948,
"calib/gap": 0.03129324894514768,
"calib/mean_conf": 0.26945718050065876,
"calib/mu_c": 0.28900000000000003,
"calib/mu_w": 0.25770675105485236,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.03438735177865613,
"calib/std_conf": 0.19475026301124918,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2568.0,
"completions/max_terminated_length": 2568.0,
"completions/mean_length": 495.70703125,
"completions/mean_terminated_length": 497.6510009765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 55.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.005067238584160805,
"learning_rate": 3.916666666666667e-06,
"loss": 0.0049,
"num_tokens": 11981607.0,
"reward": 1.2288419008255005,
"reward_std": 0.3457253873348236,
"rewards/accuracy_reward_step": 0.37109375,
"rewards/final_brier_reward_step": 0.7050671577453613,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.8583583831787109,
"step": 59
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5622261174408414,
"calib/avg_num_step_conf": 2.0078125,
"calib/ece": 0.1927982283464567,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.039012691296433616,
"calib/mean_conf": 0.2438159448818898,
"calib/mu_c": 0.2688516483516484,
"calib/mu_w": 0.22983895705521476,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.039173228346456694,
"calib/std_conf": 0.183887057063849,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2320.0,
"completions/max_terminated_length": 2320.0,
"completions/mean_length": 488.7265625,
"completions/mean_terminated_length": 488.7265625,
"completions/min_length": 67.0,
"completions/min_terminated_length": 67.0,
"epoch": 0.064,
"grad_norm": 0.0051851640455424786,
"learning_rate": 3.88888888888889e-06,
"loss": 0.0367,
"num_tokens": 12165145.0,
"reward": 1.229191780090332,
"reward_std": 0.33426523208618164,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.7230992317199707,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8768181800842285,
"step": 60
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5192792338709677,
"calib/avg_num_step_conf": 1.9140625,
"calib/ece": 0.22458095238095233,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.013787373991935525,
"calib/mean_conf": 0.2974428571428572,
"calib/mu_c": 0.30444596774193555,
"calib/mu_w": 0.29065859375,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.014980158730158727,
"calib/std_conf": 0.19797366304259614,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1870.0,
"completions/max_terminated_length": 1870.0,
"completions/mean_length": 472.359375,
"completions/mean_terminated_length": 474.2117919921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.00622786907479167,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0099,
"num_tokens": 12339701.0,
"reward": 1.3256220817565918,
"reward_std": 0.33889907598495483,
"rewards/accuracy_reward_step": 0.484375,
"rewards/final_brier_reward_step": 0.6663312911987305,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8557630777359009,
"step": 61
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5501989150090416,
"calib/avg_num_step_conf": 1.69140625,
"calib/ece": 0.15170236220472444,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02735201446654595,
"calib/mean_conf": 0.26299055118110237,
"calib/mu_c": 0.2818354430379746,
"calib/mu_w": 0.25448342857142864,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.05183464566929135,
"calib/std_conf": 0.1917988954313152,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2374.0,
"completions/max_terminated_length": 2374.0,
"completions/mean_length": 524.0390625,
"completions/mean_terminated_length": 524.0390625,
"completions/min_length": 81.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.004966284614056349,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0214,
"num_tokens": 12530503.0,
"reward": 1.198150873184204,
"reward_std": 0.3018414378166199,
"rewards/accuracy_reward_step": 0.30859375,
"rewards/final_brier_reward_step": 0.7484416961669922,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8800951242446899,
"step": 62
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5499891563652135,
"calib/avg_num_step_conf": 1.48046875,
"calib/ece": 0.16085365853658537,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02294946866189551,
"calib/mean_conf": 0.24890243902439027,
"calib/mu_c": 0.2637356321839081,
"calib/mu_w": 0.2407861635220126,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.9609375,
"calib/pce": 0.028048780487804886,
"calib/std_conf": 0.17309360758249012,
"calib/step_conf_rate": 0.9609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2724.0,
"completions/max_terminated_length": 2724.0,
"completions/mean_length": 542.71484375,
"completions/mean_terminated_length": 546.9881591796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 136.0,
"epoch": 0.0672,
"grad_norm": 0.005206855479627848,
"learning_rate": 3.8055555555555556e-06,
"loss": 0.0564,
"num_tokens": 12727646.0,
"reward": 1.1937217712402344,
"reward_std": 0.34111452102661133,
"rewards/accuracy_reward_step": 0.34765625,
"rewards/final_brier_reward_step": 0.6999675631523132,
"rewards/format_reward_step": 0.94140625,
"rewards/stepwise_brier_reward": 0.8546397686004639,
"step": 63
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.48933708567854906,
"calib/avg_num_step_conf": 1.62890625,
"calib/ece": 0.2630434782608695,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0034543464665415524,
"calib/mean_conf": 0.2290513833992095,
"calib/mu_c": 0.22727642276422766,
"calib/mu_w": 0.2307307692307692,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0029644268774703555,
"calib/std_conf": 0.1556412507684706,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1450.0,
"completions/max_terminated_length": 1450.0,
"completions/mean_length": 488.3828125,
"completions/mean_terminated_length": 490.2980651855469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 150.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.0052140094339847565,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0193,
"num_tokens": 12906016.0,
"reward": 1.3204822540283203,
"reward_std": 0.3196646571159363,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.6467587947845459,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8852865695953369,
"step": 64
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.517916032470827,
"calib/avg_num_step_conf": 1.65625,
"calib/ece": 0.20532690157480313,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00885664408929479,
"calib/mean_conf": 0.23859435826771655,
"calib/mu_c": 0.24368518518518517,
"calib/mu_w": 0.23482854109589038,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.009362204724409452,
"calib/std_conf": 0.15915199787491316,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1409.0,
"completions/max_terminated_length": 1409.0,
"completions/mean_length": 445.8984375,
"completions/mean_terminated_length": 447.6470947265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.0053698234260082245,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0045,
"num_tokens": 13074758.0,
"reward": 1.2875516414642334,
"reward_std": 0.2792130708694458,
"rewards/accuracy_reward_step": 0.421875,
"rewards/final_brier_reward_step": 0.6870719194412231,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9073121547698975,
"step": 65
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.6001217285453438,
"calib/avg_num_step_conf": 1.6015625,
"calib/ece": 0.17750875,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.053684851896936475,
"calib/mean_conf": 0.2036023611111111,
"calib/mu_c": 0.23747494623655913,
"calib/mu_w": 0.18379009433962265,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.006031746031746031,
"calib/std_conf": 0.15583048856736414,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2930.0,
"completions/max_terminated_length": 2930.0,
"completions/mean_length": 541.546875,
"completions/mean_terminated_length": 543.6705932617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.0704,
"grad_norm": 0.0056963106617331505,
"learning_rate": 3.7222222222222225e-06,
"loss": 0.0118,
"num_tokens": 13269314.0,
"reward": 1.223191261291504,
"reward_std": 0.3150969445705414,
"rewards/accuracy_reward_step": 0.36328125,
"rewards/final_brier_reward_step": 0.7096078395843506,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.8672992587089539,
"step": 66
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5755578755056767,
"calib/avg_num_step_conf": 1.65234375,
"calib/ece": 0.21422980392156862,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03657393971029624,
"calib/mean_conf": 0.19622509803921567,
"calib/mu_c": 0.21888659793814433,
"calib/mu_w": 0.1823126582278481,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.015031372549019605,
"calib/std_conf": 0.1574204142450045,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1028.0,
"completions/max_terminated_length": 1028.0,
"completions/mean_length": 495.73828125,
"completions/mean_terminated_length": 497.682373046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.004854717291891575,
"learning_rate": 3.694444444444445e-06,
"loss": -0.0018,
"num_tokens": 13450799.0,
"reward": 1.2532745599746704,
"reward_std": 0.24554133415222168,
"rewards/accuracy_reward_step": 0.37890625,
"rewards/final_brier_reward_step": 0.7047981023788452,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9113144874572754,
"step": 67
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5240679565832941,
"calib/avg_num_step_conf": 1.76953125,
"calib/ece": 0.20631909448818894,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0011602204543922356,
"calib/mean_conf": 0.21667303149606304,
"calib/mu_c": 0.21741758241758247,
"calib/mu_w": 0.21625736196319023,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.032362204724409455,
"calib/std_conf": 0.15226898440999392,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2408.0,
"completions/max_terminated_length": 2408.0,
"completions/mean_length": 479.2890625,
"completions/mean_terminated_length": 479.2890625,
"completions/min_length": 148.0,
"completions/min_terminated_length": 148.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.005828273016959429,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0098,
"num_tokens": 13627153.0,
"reward": 1.2230072021484375,
"reward_std": 0.28815874457359314,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.7085984945297241,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8810821771621704,
"step": 68
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5680162510507145,
"calib/avg_num_step_conf": 1.4609375,
"calib/ece": 0.14117647058823532,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04762293359484454,
"calib/mean_conf": 0.19390196078431376,
"calib/mu_c": 0.22602409638554222,
"calib/mu_w": 0.17840116279069768,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0047941176470588254,
"calib/std_conf": 0.14267239381513033,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 1916.0,
"completions/max_terminated_length": 1916.0,
"completions/mean_length": 553.62890625,
"completions/mean_terminated_length": 553.62890625,
"completions/min_length": 207.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.0736,
"grad_norm": 0.005274781957268715,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0347,
"num_tokens": 13822946.0,
"reward": 1.2179726362228394,
"reward_std": 0.24709612131118774,
"rewards/accuracy_reward_step": 0.32421875,
"rewards/final_brier_reward_step": 0.7536987662315369,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8863679766654968,
"step": 69
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5386532030686513,
"calib/avg_num_step_conf": 1.57421875,
"calib/ece": 0.22510019841269846,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01616616943151275,
"calib/mean_conf": 0.18362996031746034,
"calib/mu_c": 0.19331683168316838,
"calib/mu_w": 0.17715066225165563,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.00396825396825397,
"calib/std_conf": 0.13254462121225444,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2252.0,
"completions/max_terminated_length": 2252.0,
"completions/mean_length": 509.3359375,
"completions/mean_terminated_length": 511.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.005372477695345879,
"learning_rate": 3.6111111111111115e-06,
"loss": -0.0503,
"num_tokens": 14009896.0,
"reward": 1.246382236480713,
"reward_std": 0.310563325881958,
"rewards/accuracy_reward_step": 0.39453125,
"rewards/final_brier_reward_step": 0.6737847328186035,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.9020219445228577,
"step": 70
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5616416480357713,
"calib/avg_num_step_conf": 1.86328125,
"calib/ece": 0.21790703125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.028257361865218822,
"calib/mean_conf": 0.18287421875000004,
"calib/mu_c": 0.19998316831683172,
"calib/mu_w": 0.1717258064516129,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.003125,
"calib/std_conf": 0.1332470140648643,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1373.0,
"completions/max_terminated_length": 1373.0,
"completions/mean_length": 522.53125,
"completions/mean_terminated_length": 524.5804443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 206.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.005132544785737991,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0063,
"num_tokens": 14197640.0,
"reward": 1.2799885272979736,
"reward_std": 0.2574692368507385,
"rewards/accuracy_reward_step": 0.39453125,
"rewards/final_brier_reward_step": 0.7082030177116394,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9301106929779053,
"step": 71
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5610800242702083,
"calib/avg_num_step_conf": 1.93359375,
"calib/ece": 0.1971456692913386,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02938009842917816,
"calib/mean_conf": 0.18120078740157483,
"calib/mu_c": 0.20005494505494503,
"calib/mu_w": 0.17067484662576687,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.010039370078740154,
"calib/std_conf": 0.14263077656865242,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1251.0,
"completions/max_terminated_length": 1251.0,
"completions/mean_length": 484.7734375,
"completions/mean_terminated_length": 486.6745300292969,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.0768,
"grad_norm": 0.005084159318357706,
"learning_rate": 3.555555555555556e-06,
"loss": -0.0099,
"num_tokens": 14375718.0,
"reward": 1.24046790599823,
"reward_std": 0.282471239566803,
"rewards/accuracy_reward_step": 0.35546875,
"rewards/final_brier_reward_step": 0.7184491157531738,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9218480587005615,
"step": 72
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5334898665348492,
"calib/avg_num_step_conf": 1.72265625,
"calib/ece": 0.30389555686274505,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0090898781512605,
"calib/mean_conf": 0.17375150196078434,
"calib/mu_c": 0.17859943697478994,
"calib/mu_w": 0.16950955882352944,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.005490196078431373,
"calib/std_conf": 0.12611939174361494,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1068.0,
"completions/max_terminated_length": 1068.0,
"completions/mean_length": 494.73828125,
"completions/mean_terminated_length": 496.678466796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 182.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.0048870607279241085,
"learning_rate": 3.5277777777777784e-06,
"loss": -0.0065,
"num_tokens": 14558971.0,
"reward": 1.3024544715881348,
"reward_std": 0.2780904769897461,
"rewards/accuracy_reward_step": 0.46484375,
"rewards/final_brier_reward_step": 0.6398916244506836,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8894093632698059,
"step": 73
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5445133853876185,
"calib/avg_num_step_conf": 2.12109375,
"calib/ece": 0.19820119521912347,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.021587702175125484,
"calib/mean_conf": 0.16195816733067728,
"calib/mu_c": 0.17597727272727273,
"calib/mu_w": 0.15438957055214725,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.004780876494023905,
"calib/std_conf": 0.12229669381367947,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2595.0,
"completions/max_terminated_length": 2595.0,
"completions/mean_length": 560.17578125,
"completions/mean_terminated_length": 560.17578125,
"completions/min_length": 125.0,
"completions/min_terminated_length": 125.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.005485473200678825,
"learning_rate": 3.5e-06,
"loss": 0.037,
"num_tokens": 14755872.0,
"reward": 1.210759162902832,
"reward_std": 0.3039107322692871,
"rewards/accuracy_reward_step": 0.34765625,
"rewards/final_brier_reward_step": 0.7089241743087769,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.8767507076263428,
"step": 74
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5822851153039832,
"calib/avg_num_step_conf": 2.14453125,
"calib/ece": 0.4596764705882353,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02781348270440248,
"calib/mean_conf": 0.16385294117647062,
"calib/mu_c": 0.17432389937106918,
"calib/mu_w": 0.1465104166666667,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.11003423237916028,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1353.0,
"completions/max_terminated_length": 1353.0,
"completions/mean_length": 514.09765625,
"completions/mean_terminated_length": 516.11376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.08,
"grad_norm": 0.005142589565366507,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0084,
"num_tokens": 14941801.0,
"reward": 1.414496660232544,
"reward_std": 0.24992991983890533,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.5492589473724365,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8844687342643738,
"step": 75
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5394428694687539,
"calib/avg_num_step_conf": 2.42578125,
"calib/ece": 0.37201381740196077,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.022696512830488097,
"calib/mean_conf": 0.14955481004901963,
"calib/mu_c": 0.16041353383458648,
"calib/mu_w": 0.13771702100409838,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.11541540821168365,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2557.0,
"completions/max_terminated_length": 2557.0,
"completions/mean_length": 562.54296875,
"completions/mean_terminated_length": 562.54296875,
"completions/min_length": 95.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.006148222368210554,
"learning_rate": 3.444444444444445e-06,
"loss": 0.0345,
"num_tokens": 15138436.0,
"reward": 1.3471288681030273,
"reward_std": 0.2481614351272583,
"rewards/accuracy_reward_step": 0.51953125,
"rewards/final_brier_reward_step": 0.6013270616531372,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9217990636825562,
"step": 76
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.45207688338493285,
"calib/avg_num_step_conf": 2.203125,
"calib/ece": 0.45151673228346456,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.023998690660474747,
"calib/mean_conf": 0.16383759842519685,
"calib/mu_c": 0.15420032894736843,
"calib/mu_w": 0.17819901960784318,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.008464566929133856,
"calib/std_conf": 0.12103179437546255,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2595.0,
"completions/max_terminated_length": 2595.0,
"completions/mean_length": 591.92578125,
"completions/mean_terminated_length": 591.92578125,
"completions/min_length": 186.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.005385115742683411,
"learning_rate": 3.416666666666667e-06,
"loss": 0.0317,
"num_tokens": 15344201.0,
"reward": 1.381176233291626,
"reward_std": 0.262881875038147,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.5356004238128662,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.897254467010498,
"step": 77
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5417076771653543,
"calib/avg_num_step_conf": 2.65625,
"calib/ece": 0.34595058823529407,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.019628457185039305,
"calib/mean_conf": 0.1560101960784314,
"calib/mu_c": 0.16578593750000004,
"calib/mu_w": 0.14615748031496073,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.11908328332149824,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2025.0,
"completions/max_terminated_length": 2025.0,
"completions/mean_length": 632.24609375,
"completions/mean_terminated_length": 634.7255249023438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 133.0,
"epoch": 0.0832,
"grad_norm": 0.004347877111285925,
"learning_rate": 3.3888888888888893e-06,
"loss": -0.0022,
"num_tokens": 15563648.0,
"reward": 1.344900369644165,
"reward_std": 0.2374875694513321,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.6235101222991943,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9372687339782715,
"step": 78
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5324177579048228,
"calib/avg_num_step_conf": 2.2578125,
"calib/ece": 0.4283722656249999,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01839741296710315,
"calib/mean_conf": 0.17709648437500003,
"calib/mu_c": 0.18435483870967745,
"calib/mu_w": 0.1659574257425743,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.12327377126928858,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1236.0,
"completions/max_terminated_length": 1236.0,
"completions/mean_length": 596.4453125,
"completions/mean_terminated_length": 598.7843627929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.004410876892507076,
"learning_rate": 3.3611111111111117e-06,
"loss": 0.014,
"num_tokens": 15772282.0,
"reward": 1.4093378782272339,
"reward_std": 0.23596200346946716,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.5636845827102661,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8974823951721191,
"step": 79
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5459320091673032,
"calib/avg_num_step_conf": 2.40625,
"calib/ece": 0.430138671875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.015111471861471854,
"calib/mean_conf": 0.171423828125,
"calib/mu_c": 0.1774448051948052,
"calib/mu_w": 0.16233333333333336,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0,
"calib/std_conf": 0.12099904384708153,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1627.0,
"completions/max_terminated_length": 1627.0,
"completions/mean_length": 570.23828125,
"completions/mean_terminated_length": 572.4745483398438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 79.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.004657160025089979,
"learning_rate": 3.3333333333333333e-06,
"loss": -0.0026,
"num_tokens": 15969991.0,
"reward": 1.3999781608581543,
"reward_std": 0.2511942982673645,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.561883270740509,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8839585185050964,
"step": 80
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5228434608166699,
"calib/avg_num_step_conf": 2.41015625,
"calib/ece": 0.41380065616797895,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010194984792596895,
"calib/mean_conf": 0.19328595800524934,
"calib/mu_c": 0.19733986928104577,
"calib/mu_w": 0.18714488448844888,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.002362204724409449,
"calib/std_conf": 0.12704253680914465,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3021.0,
"completions/max_terminated_length": 3021.0,
"completions/mean_length": 639.5703125,
"completions/mean_terminated_length": 642.0784912109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 233.0,
"epoch": 0.0864,
"grad_norm": 0.004149497952312231,
"learning_rate": 3.3055555555555558e-06,
"loss": 0.022,
"num_tokens": 16189537.0,
"reward": 1.4040427207946777,
"reward_std": 0.2714318037033081,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.5699639320373535,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9043678045272827,
"step": 81
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.47222570532915353,
"calib/avg_num_step_conf": 2.53125,
"calib/ece": 0.37285294117647066,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.017988244514106605,
"calib/mean_conf": 0.22479411764705884,
"calib/mu_c": 0.2170344827586207,
"calib/mu_w": 0.2350227272727273,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.014509803921568625,
"calib/std_conf": 0.1279360355063584,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1886.0,
"completions/max_terminated_length": 1886.0,
"completions/mean_length": 587.3046875,
"completions/mean_terminated_length": 589.6078491210938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 236.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.004640071652829647,
"learning_rate": 3.277777777777778e-06,
"loss": 0.0542,
"num_tokens": 16395007.0,
"reward": 1.3727104663848877,
"reward_std": 0.26982933282852173,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.5919941067695618,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8693534731864929,
"step": 82
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6083211143695014,
"calib/avg_num_step_conf": 2.296875,
"calib/ece": 0.30937421875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.045839247311827896,
"calib/mean_conf": 0.21093828125000003,
"calib/mu_c": 0.23314166666666666,
"calib/mu_w": 0.18730241935483877,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.00234375,
"calib/std_conf": 0.13373396902823867,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1952.0,
"completions/max_terminated_length": 1952.0,
"completions/mean_length": 671.09765625,
"completions/mean_terminated_length": 673.7294311523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 251.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.004066772758960724,
"learning_rate": 3.2500000000000002e-06,
"loss": 0.016,
"num_tokens": 16623640.0,
"reward": 1.378986120223999,
"reward_std": 0.2268155962228775,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.6610163450241089,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.936099648475647,
"step": 83
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5661490683229813,
"calib/avg_num_step_conf": 2.19921875,
"calib/ece": 0.32683333333333336,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03280822981366452,
"calib/mean_conf": 0.2221862745098039,
"calib/mu_c": 0.2369821428571428,
"calib/mu_w": 0.2041739130434783,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.13624174595557223,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1615.0,
"completions/max_terminated_length": 1615.0,
"completions/mean_length": 593.65234375,
"completions/mean_terminated_length": 595.9804077148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.0896,
"grad_norm": 0.004806416109204292,
"learning_rate": 3.2222222222222227e-06,
"loss": 0.0066,
"num_tokens": 16831103.0,
"reward": 1.3885453939437866,
"reward_std": 0.2803252935409546,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.6362625360488892,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9082188010215759,
"step": 84
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5601453855878634,
"calib/avg_num_step_conf": 2.36328125,
"calib/ece": 0.21920688405797098,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.020185141698272208,
"calib/mean_conf": 0.24956781949934123,
"calib/mu_c": 0.26073746312684365,
"calib/mu_w": 0.24055232142857144,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.01106719367588933,
"calib/std_conf": 0.14309457774759896,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2811.0,
"completions/max_terminated_length": 2811.0,
"completions/mean_length": 624.90234375,
"completions/mean_terminated_length": 627.3529663085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.0047371708787977695,
"learning_rate": 3.1944444444444443e-06,
"loss": 0.0222,
"num_tokens": 17048470.0,
"reward": 1.3084673881530762,
"reward_std": 0.2495461404323578,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6849251985549927,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.9155815839767456,
"step": 85
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5725414433503677,
"calib/avg_num_step_conf": 2.2109375,
"calib/ece": 0.21341450980392157,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.028003184594291325,
"calib/mean_conf": 0.2587423529411765,
"calib/mu_c": 0.2743362831858407,
"calib/mu_w": 0.24633309859154937,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.014509803921568629,
"calib/std_conf": 0.13502763061595127,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1672.0,
"completions/max_terminated_length": 1672.0,
"completions/mean_length": 624.00390625,
"completions/mean_terminated_length": 626.4509887695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 160.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.005471901968121529,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0116,
"num_tokens": 17263295.0,
"reward": 1.313936471939087,
"reward_std": 0.2574613690376282,
"rewards/accuracy_reward_step": 0.44140625,
"rewards/final_brier_reward_step": 0.6988636255264282,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9158311486244202,
"step": 86
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5514144125542021,
"calib/avg_num_step_conf": 1.90625,
"calib/ece": 0.39824015748031494,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.026239039163053146,
"calib/mean_conf": 0.259240157480315,
"calib/mu_c": 0.26822754491017964,
"calib/mu_w": 0.2419885057471265,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.0,
"calib/std_conf": 0.12253291941406065,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2744.0,
"completions/max_terminated_length": 2744.0,
"completions/mean_length": 540.47265625,
"completions/mean_terminated_length": 540.47265625,
"completions/min_length": 190.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.0928,
"grad_norm": 0.004846483934670687,
"learning_rate": 3.138888888888889e-06,
"loss": 0.0196,
"num_tokens": 17456720.0,
"reward": 1.4581712484359741,
"reward_std": 0.25667858123779297,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.5915886163711548,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.8729454874992371,
"step": 87
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5230451366815003,
"calib/avg_num_step_conf": 2.203125,
"calib/ece": 0.3017470355731225,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.019502797202797184,
"calib/mean_conf": 0.26663241106719365,
"calib/mu_c": 0.2751118881118881,
"calib/mu_w": 0.2556090909090909,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0015810276679841919,
"calib/std_conf": 0.14371236663911982,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2261.0,
"completions/max_terminated_length": 2261.0,
"completions/mean_length": 654.75390625,
"completions/mean_terminated_length": 654.75390625,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.0045557511039078236,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.0455,
"num_tokens": 17683753.0,
"reward": 1.395997166633606,
"reward_std": 0.2871512472629547,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.6374142169952393,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8872852921485901,
"step": 88
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.555745341614907,
"calib/avg_num_step_conf": 2.046875,
"calib/ece": 0.19541455384313725,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.025722160959627316,
"calib/mean_conf": 0.2751736814509804,
"calib/mu_c": 0.289295652173913,
"calib/mu_w": 0.2635734912142857,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.009803921568627449,
"calib/std_conf": 0.1283167888660835,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1837.0,
"completions/max_terminated_length": 1837.0,
"completions/mean_length": 628.22265625,
"completions/mean_terminated_length": 630.6863403320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 237.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.004618597216904163,
"learning_rate": 3.0833333333333336e-06,
"loss": 0.0243,
"num_tokens": 17903034.0,
"reward": 1.3240776062011719,
"reward_std": 0.22102946043014526,
"rewards/accuracy_reward_step": 0.44921875,
"rewards/final_brier_reward_step": 0.709104061126709,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8952895402908325,
"step": 89
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.535424836601307,
"calib/avg_num_step_conf": 2.0234375,
"calib/ece": 0.3402944664031621,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010287254901960863,
"calib/mean_conf": 0.28757114624505936,
"calib/mu_c": 0.29163725490196085,
"calib/mu_w": 0.28135,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.011561264822134388,
"calib/std_conf": 0.13380067056685727,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2446.0,
"completions/max_terminated_length": 2446.0,
"completions/mean_length": 613.71875,
"completions/mean_terminated_length": 613.71875,
"completions/min_length": 90.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.096,
"grad_norm": 0.005011430941522121,
"learning_rate": 3.055555555555556e-06,
"loss": 0.0563,
"num_tokens": 18113034.0,
"reward": 1.4468196630477905,
"reward_std": 0.23396410048007965,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.6398017406463623,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9154877662658691,
"step": 90
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5235210504941112,
"calib/avg_num_step_conf": 2.03515625,
"calib/ece": 0.35872470588235295,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.021795234872072522,
"calib/mean_conf": 0.2922556862745099,
"calib/mu_c": 0.29986265060240963,
"calib/mu_w": 0.2780674157303371,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.12715683799174327,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1583.0,
"completions/max_terminated_length": 1583.0,
"completions/mean_length": 605.078125,
"completions/mean_terminated_length": 607.4509887695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 74.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.00484122522175312,
"learning_rate": 3.0277777777777776e-06,
"loss": -0.0012,
"num_tokens": 18325214.0,
"reward": 1.488823413848877,
"reward_std": 0.2453734278678894,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.6353552341461182,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8955209255218506,
"step": 91
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5746240243670284,
"calib/avg_num_step_conf": 1.984375,
"calib/ece": 0.30334374999999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02283469763309859,
"calib/mean_conf": 0.303453125,
"calib/mu_c": 0.312640522875817,
"calib/mu_w": 0.2898058252427184,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0045703125,
"calib/std_conf": 0.11596806264111846,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1319.0,
"completions/max_terminated_length": 1319.0,
"completions/mean_length": 553.953125,
"completions/mean_terminated_length": 556.1255493164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 216.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.005605071783065796,
"learning_rate": 3e-06,
"loss": -0.0133,
"num_tokens": 18523314.0,
"reward": 1.4612683057785034,
"reward_std": 0.21096697449684143,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.6705144643783569,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9134193062782288,
"step": 92
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5141875,
"calib/avg_num_step_conf": 1.859375,
"calib/ece": 0.22534505928853754,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0009557249999999073,
"calib/mean_conf": 0.29639407114624505,
"calib/mu_c": 0.2968775999999999,
"calib/mu_w": 0.295921875,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.01383399209486166,
"calib/std_conf": 0.10620626886336024,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2470.0,
"completions/max_terminated_length": 2470.0,
"completions/mean_length": 606.7578125,
"completions/mean_terminated_length": 606.7578125,
"completions/min_length": 149.0,
"completions/min_terminated_length": 149.0,
"epoch": 0.0992,
"grad_norm": 0.004706274252384901,
"learning_rate": 2.9722222222222225e-06,
"loss": 0.0151,
"num_tokens": 18733988.0,
"reward": 1.3505432605743408,
"reward_std": 0.26323801279067993,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.6854676008224487,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9015504717826843,
"step": 93
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5609452736318409,
"calib/avg_num_step_conf": 1.8046875,
"calib/ece": 0.22904621513944226,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.023001575456053025,
"calib/mean_conf": 0.3112908366533865,
"calib/mu_c": 0.32201268656716414,
"calib/mu_w": 0.2990111111111111,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0032362549800796815,
"calib/std_conf": 0.11176713392938932,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2890.0,
"completions/max_terminated_length": 2890.0,
"completions/mean_length": 613.76953125,
"completions/mean_terminated_length": 613.76953125,
"completions/min_length": 179.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.005544749088585377,
"learning_rate": 2.944444444444445e-06,
"loss": 0.0675,
"num_tokens": 18949361.0,
"reward": 1.3829312324523926,
"reward_std": 0.30212563276290894,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.6831309795379639,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8998380303382874,
"step": 94
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5020877518557795,
"calib/avg_num_step_conf": 1.8984375,
"calib/ece": 0.3291289062500001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.012245758218451752,
"calib/mean_conf": 0.32258984375,
"calib/mu_c": 0.3181890243902439,
"calib/mu_w": 0.33043478260869563,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.005546875,
"calib/std_conf": 0.102866416971724,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1538.0,
"completions/max_terminated_length": 1538.0,
"completions/mean_length": 585.6328125,
"completions/mean_terminated_length": 587.929443359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 165.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.005202965810894966,
"learning_rate": 2.916666666666667e-06,
"loss": 0.0122,
"num_tokens": 19154979.0,
"reward": 1.493459701538086,
"reward_std": 0.2513244152069092,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.6524089574813843,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9065208435058594,
"step": 95
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6400238473767886,
"calib/avg_num_step_conf": 1.78125,
"calib/ece": 0.40860606060606064,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04988881822999475,
"calib/mean_conf": 0.3273623188405797,
"calib/mu_c": 0.34077117117117117,
"calib/mu_w": 0.2908823529411764,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.0023715415019762848,
"calib/std_conf": 0.11390679160950982,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2532.0,
"completions/max_terminated_length": 2532.0,
"completions/mean_length": 565.25,
"completions/mean_terminated_length": 565.25,
"completions/min_length": 191.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.1024,
"grad_norm": 0.006645775865763426,
"learning_rate": 2.888888888888889e-06,
"loss": 0.094,
"num_tokens": 19355067.0,
"reward": 1.5457134246826172,
"reward_std": 0.2587122321128845,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.6358581781387329,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8392626643180847,
"step": 96
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6319811320754717,
"calib/avg_num_step_conf": 1.75390625,
"calib/ece": 0.26446875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.047192452830188636,
"calib/mean_conf": 0.326859375,
"calib/mu_c": 0.3464,
"calib/mu_w": 0.29920754716981135,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.002695312500000002,
"calib/std_conf": 0.11486052917390453,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1385.0,
"completions/max_terminated_length": 1385.0,
"completions/mean_length": 554.2578125,
"completions/mean_terminated_length": 556.431396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 204.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.006527770310640335,
"learning_rate": 2.861111111111111e-06,
"loss": 0.0134,
"num_tokens": 19551597.0,
"reward": 1.4724822044372559,
"reward_std": 0.24671298265457153,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.699970006942749,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.94623863697052,
"step": 97
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5448076923076923,
"calib/avg_num_step_conf": 1.5703125,
"calib/ece": 0.25709514566929137,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005448144230769225,
"calib/mean_conf": 0.3551292637795275,
"calib/mu_c": 0.35736,
"calib/mu_w": 0.3519118557692308,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.01083661417322835,
"calib/std_conf": 0.1257529834772536,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2692.0,
"completions/max_terminated_length": 2692.0,
"completions/mean_length": 591.34375,
"completions/mean_terminated_length": 593.6627807617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.005861423909664154,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0089,
"num_tokens": 19758733.0,
"reward": 1.4559632539749146,
"reward_std": 0.2572120726108551,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.6842094659805298,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.905434250831604,
"step": 98
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5618983442433576,
"calib/avg_num_step_conf": 1.66015625,
"calib/ece": 0.1394718342848777,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010118537296120445,
"calib/mean_conf": 0.342750095154443,
"calib/mu_c": 0.3486292452830189,
"calib/mu_w": 0.3385107079868985,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.03162479871175524,
"calib/std_conf": 0.11997389444582301,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1862.0,
"completions/max_terminated_length": 1862.0,
"completions/mean_length": 663.7421875,
"completions/mean_terminated_length": 668.968505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.1056,
"grad_norm": 0.005669788923114538,
"learning_rate": 2.805555555555556e-06,
"loss": 0.001,
"num_tokens": 19984019.0,
"reward": 1.3039848804473877,
"reward_std": 0.32023513317108154,
"rewards/accuracy_reward_step": 0.4140625,
"rewards/final_brier_reward_step": 0.7289392352104187,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9205611944198608,
"step": 99
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5417953667953668,
"calib/avg_num_step_conf": 1.796875,
"calib/ece": 0.2451225296442688,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0061968468468468485,
"calib/mean_conf": 0.3536916996047431,
"calib/mu_c": 0.35626351351351354,
"calib/mu_w": 0.3500666666666667,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.006916996047430832,
"calib/std_conf": 0.11269830923059786,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2582.0,
"completions/max_terminated_length": 2582.0,
"completions/mean_length": 647.5546875,
"completions/mean_terminated_length": 647.5546875,
"completions/min_length": 189.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.005725045222789049,
"learning_rate": 2.7777777777777783e-06,
"loss": 0.0159,
"num_tokens": 20206769.0,
"reward": 1.4439170360565186,
"reward_std": 0.25288286805152893,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.6800872087478638,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9264310598373413,
"step": 100
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4667576283800546,
"calib/avg_num_step_conf": 1.6953125,
"calib/ece": 0.1910352941176471,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0248276482262465,
"calib/mean_conf": 0.3417490196078431,
"calib/mu_c": 0.3282155172413794,
"calib/mu_w": 0.3530431654676259,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.03894117647058824,
"calib/std_conf": 0.10193992727851357,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2630.0,
"completions/max_terminated_length": 2630.0,
"completions/mean_length": 653.72265625,
"completions/mean_terminated_length": 653.72265625,
"completions/min_length": 244.0,
"completions/min_terminated_length": 244.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.00673833629116416,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0,
"num_tokens": 20430682.0,
"reward": 1.3407317399978638,
"reward_std": 0.2800105810165405,
"rewards/accuracy_reward_step": 0.453125,
"rewards/final_brier_reward_step": 0.7115739583969116,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9366538524627686,
"step": 101
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5668928571428571,
"calib/avg_num_step_conf": 1.64453125,
"calib/ece": 0.33908392156862743,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010896642857142869,
"calib/mean_conf": 0.34719058823529414,
"calib/mu_c": 0.35060914285714284,
"calib/mu_w": 0.3397125,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0,
"calib/std_conf": 0.104142325640683,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1304.0,
"completions/max_terminated_length": 1304.0,
"completions/mean_length": 515.89453125,
"completions/mean_terminated_length": 517.9176635742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.1088,
"grad_norm": 0.007361991330981255,
"learning_rate": 2.7222222222222224e-06,
"loss": 0.01,
"num_tokens": 20619015.0,
"reward": 1.541640043258667,
"reward_std": 0.17958354949951172,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6573126316070557,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9113100171089172,
"step": 102
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5523360867320599,
"calib/avg_num_step_conf": 1.66015625,
"calib/ece": 0.23990632411067192,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.008410286525554866,
"calib/mean_conf": 0.36594347826086954,
"calib/mu_c": 0.3694006711409395,
"calib/mu_w": 0.36099038461538463,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.008458498023715419,
"calib/std_conf": 0.1126632795205633,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2921.0,
"completions/max_terminated_length": 2921.0,
"completions/mean_length": 676.81640625,
"completions/mean_terminated_length": 679.4706420898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.005382324568927288,
"learning_rate": 2.6944444444444444e-06,
"loss": 0.0548,
"num_tokens": 20846400.0,
"reward": 1.440127968788147,
"reward_std": 0.23319774866104126,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.6840233206748962,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8924654722213745,
"step": 103
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5241526717557251,
"calib/avg_num_step_conf": 1.62109375,
"calib/ece": 0.1861375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0061585954198473125,
"calib/mean_conf": 0.36055546875,
"calib/mu_c": 0.357404,
"calib/mu_w": 0.3635625954198473,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.029205859375000007,
"calib/std_conf": 0.13196414570383416,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1505.0,
"completions/max_terminated_length": 1505.0,
"completions/mean_length": 586.37109375,
"completions/mean_terminated_length": 588.6705932617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.006029161624610424,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0231,
"num_tokens": 21052759.0,
"reward": 1.3788419961929321,
"reward_std": 0.2469767928123474,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.7133313417434692,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9355801939964294,
"step": 104
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5728983136169142,
"calib/avg_num_step_conf": 1.46875,
"calib/ece": 0.20540316205533596,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.019087465391392,
"calib/mean_conf": 0.37214624505928856,
"calib/mu_c": 0.3808978102189782,
"calib/mu_w": 0.36181034482758617,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.018023715415019768,
"calib/std_conf": 0.12139836733918537,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2568.0,
"completions/max_terminated_length": 2568.0,
"completions/mean_length": 619.8359375,
"completions/mean_terminated_length": 622.2667236328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 246.0,
"epoch": 0.112,
"grad_norm": 0.006988944485783577,
"learning_rate": 2.6388888888888893e-06,
"loss": 0.0399,
"num_tokens": 21266765.0,
"reward": 1.396331548690796,
"reward_std": 0.281770795583725,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.6915109753608704,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.8882417678833008,
"step": 105
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.549142892892893,
"calib/avg_num_step_conf": 1.484375,
"calib/ece": 0.19350313725490204,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.013794650900900873,
"calib/mean_conf": 0.4202223529411765,
"calib/mu_c": 0.4262270833333333,
"calib/mu_w": 0.41243243243243244,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.024509803921568672,
"calib/std_conf": 0.14519558523224546,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1682.0,
"completions/max_terminated_length": 1682.0,
"completions/mean_length": 577.1171875,
"completions/mean_terminated_length": 579.3804321289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.0071154567413032055,
"learning_rate": 2.6111111111111113e-06,
"loss": -0.0393,
"num_tokens": 21468659.0,
"reward": 1.4477204084396362,
"reward_std": 0.2633710205554962,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.7140500545501709,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9221565127372742,
"step": 106
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5438095238095237,
"calib/avg_num_step_conf": 1.5234375,
"calib/ece": 0.16790849673202618,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.023317460317460292,
"calib/mean_conf": 0.4320653594771241,
"calib/mu_c": 0.4416666666666666,
"calib/mu_w": 0.4183492063492063,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.005869281045751641,
"calib/std_conf": 0.1471456975062049,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2762.0,
"completions/max_terminated_length": 2762.0,
"completions/mean_length": 572.26953125,
"completions/mean_terminated_length": 572.26953125,
"completions/min_length": 198.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.008440433070063591,
"learning_rate": 2.5833333333333337e-06,
"loss": 0.0671,
"num_tokens": 21669344.0,
"reward": 1.4780728816986084,
"reward_std": 0.2975276708602905,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.7202157974243164,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9327974319458008,
"step": 107
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5260141093474426,
"calib/avg_num_step_conf": 1.4609375,
"calib/ece": 0.30506797385620915,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.017657206990540353,
"calib/mean_conf": 0.44748104575163394,
"calib/mu_c": 0.45205114638447974,
"calib/mu_w": 0.4343939393939394,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.005686274509803921,
"calib/std_conf": 0.15370237052146365,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2831.0,
"completions/max_terminated_length": 2831.0,
"completions/mean_length": 637.484375,
"completions/mean_terminated_length": 637.484375,
"completions/min_length": 166.0,
"completions/min_terminated_length": 166.0,
"epoch": 0.1152,
"grad_norm": 0.006027248688042164,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0228,
"num_tokens": 21885340.0,
"reward": 1.6121673583984375,
"reward_std": 0.2332625687122345,
"rewards/accuracy_reward_step": 0.73828125,
"rewards/final_brier_reward_step": 0.702305018901825,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.895621657371521,
"step": 108
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5266529183852018,
"calib/avg_num_step_conf": 1.328125,
"calib/ece": 0.15022529644268773,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00822453443319593,
"calib/mean_conf": 0.46661264822134385,
"calib/mu_c": 0.47070866141732287,
"calib/mu_w": 0.46248412698412694,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.057430830039525704,
"calib/std_conf": 0.15664296958074397,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1922.0,
"completions/max_terminated_length": 1922.0,
"completions/mean_length": 589.1328125,
"completions/mean_terminated_length": 593.7716674804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 244.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.0065979138016700745,
"learning_rate": 2.5277777777777778e-06,
"loss": -0.0103,
"num_tokens": 22090326.0,
"reward": 1.3793652057647705,
"reward_std": 0.21767696738243103,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.7197933793067932,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9075617790222168,
"step": 109
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5017532467532467,
"calib/avg_num_step_conf": 1.31640625,
"calib/ece": 0.1660236220472441,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0028753246753246975,
"calib/mean_conf": 0.49854330708661415,
"calib/mu_c": 0.4996753246753247,
"calib/mu_w": 0.4968,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.029133858267716542,
"calib/std_conf": 0.1583895455025244,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1559.0,
"completions/max_terminated_length": 1559.0,
"completions/mean_length": 549.453125,
"completions/mean_terminated_length": 553.779541015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 122.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.006974409334361553,
"learning_rate": 2.5e-06,
"loss": -0.0105,
"num_tokens": 22285474.0,
"reward": 1.4861176013946533,
"reward_std": 0.3244120478630066,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.7152194976806641,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9265310168266296,
"step": 110
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5258145363408521,
"calib/avg_num_step_conf": 1.24609375,
"calib/ece": 0.13496062992125984,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01481954887218051,
"calib/mean_conf": 0.5160629921259842,
"calib/mu_c": 0.5227142857142858,
"calib/mu_w": 0.5078947368421053,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.049921259842519695,
"calib/std_conf": 0.1563893912683447,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2762.0,
"completions/max_terminated_length": 2762.0,
"completions/mean_length": 578.85546875,
"completions/mean_terminated_length": 581.1255493164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 201.0,
"epoch": 0.1184,
"grad_norm": 0.008550817146897316,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0042,
"num_tokens": 22490637.0,
"reward": 1.4284076690673828,
"reward_std": 0.3240845203399658,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.7224196195602417,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9000416994094849,
"step": 111
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5529933196300103,
"calib/avg_num_step_conf": 1.23046875,
"calib/ece": 0.11254980079681276,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03227774922918819,
"calib/mean_conf": 0.4994820717131474,
"calib/mu_c": 0.513884892086331,
"calib/mu_w": 0.48160714285714284,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.029123505976095664,
"calib/std_conf": 0.1548014107500639,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2952.0,
"completions/max_terminated_length": 2952.0,
"completions/mean_length": 628.75390625,
"completions/mean_terminated_length": 631.2196655273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.005662142299115658,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.048,
"num_tokens": 22709086.0,
"reward": 1.4255472421646118,
"reward_std": 0.3332613706588745,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.7239608764648438,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8948922753334045,
"step": 112
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4897715539494063,
"calib/avg_num_step_conf": 1.16015625,
"calib/ece": 0.16262845849802376,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.006113835828600811,
"calib/mean_conf": 0.5369762845849803,
"calib/mu_c": 0.5344630872483223,
"calib/mu_w": 0.5405769230769231,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0553359683794467,
"calib/std_conf": 0.14319907723921993,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2745.0,
"completions/max_terminated_length": 2745.0,
"completions/mean_length": 541.96875,
"completions/mean_terminated_length": 544.0941772460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 211.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.008202227763831615,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0357,
"num_tokens": 22902598.0,
"reward": 1.4649397134780884,
"reward_std": 0.30794620513916016,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.7202385663986206,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8942817449569702,
"step": 113
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5605185185185186,
"calib/avg_num_step_conf": 1.1015625,
"calib/ece": 0.1693333333333333,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03458888888888889,
"calib/mean_conf": 0.5365490196078431,
"calib/mu_c": 0.5467222222222222,
"calib/mu_w": 0.5121333333333333,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.0,
"calib/std_conf": 0.13868801406082962,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2582.0,
"completions/max_terminated_length": 2582.0,
"completions/mean_length": 537.8203125,
"completions/mean_terminated_length": 537.8203125,
"completions/min_length": 193.0,
"completions/min_terminated_length": 193.0,
"epoch": 0.1216,
"grad_norm": 0.010839417576789856,
"learning_rate": 2.388888888888889e-06,
"loss": 0.0086,
"num_tokens": 23094872.0,
"reward": 1.5901761054992676,
"reward_std": 0.2540254592895508,
"rewards/accuracy_reward_step": 0.703125,
"rewards/final_brier_reward_step": 0.7430562376976013,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8855295777320862,
"step": 114
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4970914041710502,
"calib/avg_num_step_conf": 1.0703125,
"calib/ece": 0.11390625000000007,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.004249025310972154,
"calib/mean_conf": 0.550546875,
"calib/mu_c": 0.5486713286713287,
"calib/mu_w": 0.5529203539823009,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05292968750000006,
"calib/std_conf": 0.14277554824876132,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1102.0,
"completions/max_terminated_length": 1102.0,
"completions/mean_length": 506.5234375,
"completions/mean_terminated_length": 508.50982666015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.00830326322466135,
"learning_rate": 2.361111111111111e-06,
"loss": 0.0223,
"num_tokens": 23279374.0,
"reward": 1.4463419914245605,
"reward_std": 0.30777207016944885,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.7279585599899292,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.899763286113739,
"step": 115
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4285850738665308,
"calib/avg_num_step_conf": 1.015625,
"calib/ece": 0.17273333333333335,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.018333036169128736,
"calib/mean_conf": 0.583086274509804,
"calib/mu_c": 0.5756092715231789,
"calib/mu_w": 0.5939423076923076,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.08183137254901961,
"calib/std_conf": 0.12283162558145794,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1589.0,
"completions/max_terminated_length": 1589.0,
"completions/mean_length": 570.51953125,
"completions/mean_terminated_length": 572.7568969726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 181.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.006899895146489143,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0239,
"num_tokens": 23479515.0,
"reward": 1.4818896055221558,
"reward_std": 0.2726801335811615,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.7259652614593506,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.914690375328064,
"step": 116
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5177538461538462,
"calib/avg_num_step_conf": 1.05078125,
"calib/ece": 0.10890196078431374,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.018716923076923186,
"calib/mean_conf": 0.5710980392156862,
"calib/mu_c": 0.5806399999999999,
"calib/mu_w": 0.5619230769230767,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.09490196078431376,
"calib/std_conf": 0.13763966238995937,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1486.0,
"completions/max_terminated_length": 1486.0,
"completions/mean_length": 537.26171875,
"completions/mean_terminated_length": 539.36865234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.1248,
"grad_norm": 0.010562032461166382,
"learning_rate": 2.305555555555556e-06,
"loss": 0.0179,
"num_tokens": 23673222.0,
"reward": 1.3744449615478516,
"reward_std": 0.3122226893901825,
"rewards/accuracy_reward_step": 0.48828125,
"rewards/final_brier_reward_step": 0.7230820655822754,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9172408580780029,
"step": 117
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5072554996024383,
"calib/avg_num_step_conf": 1.01171875,
"calib/ece": 0.08763492063492065,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01569202226345079,
"calib/mean_conf": 0.6142222222222222,
"calib/mu_c": 0.6203246753246753,
"calib/mu_w": 0.6046326530612245,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.045373015873015884,
"calib/std_conf": 0.12737402048092236,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2454.0,
"completions/max_terminated_length": 2454.0,
"completions/mean_length": 546.9609375,
"completions/mean_terminated_length": 551.2677001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.008186553604900837,
"learning_rate": 2.277777777777778e-06,
"loss": -0.0255,
"num_tokens": 23866820.0,
"reward": 1.4868626594543457,
"reward_std": 0.28711938858032227,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.7340095043182373,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.9059938192367554,
"step": 118
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4986163522012579,
"calib/avg_num_step_conf": 1.015625,
"calib/ece": 0.10252734374999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.013483647798742227,
"calib/mean_conf": 0.6026835937499999,
"calib/mu_c": 0.6082666666666667,
"calib/mu_w": 0.5947830188679245,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.05963671874999997,
"calib/std_conf": 0.1323059512298482,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1792.0,
"completions/max_terminated_length": 1792.0,
"completions/mean_length": 580.6953125,
"completions/mean_terminated_length": 582.9725952148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 152.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.006644046399742365,
"learning_rate": 2.25e-06,
"loss": 0.036,
"num_tokens": 24070110.0,
"reward": 1.4794600009918213,
"reward_std": 0.3241386413574219,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.735337495803833,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9174776077270508,
"step": 119
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6072461817937159,
"calib/avg_num_step_conf": 1.0703125,
"calib/ece": 0.06495312499999994,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0679617842965754,
"calib/mean_conf": 0.5873906249999999,
"calib/mu_c": 0.6110179640718564,
"calib/mu_w": 0.543056179775281,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.14516169510104718,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1554.0,
"completions/max_terminated_length": 1554.0,
"completions/mean_length": 516.69140625,
"completions/mean_terminated_length": 518.7176513671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.128,
"grad_norm": 0.009012932889163494,
"learning_rate": 2.222222222222222e-06,
"loss": -0.0092,
"num_tokens": 24258639.0,
"reward": 1.5737022161483765,
"reward_std": 0.2422880381345749,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7758143544197083,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9384927749633789,
"step": 120
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5292538915727322,
"calib/avg_num_step_conf": 1.04296875,
"calib/ece": 0.042073622047244165,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.006989828234031048,
"calib/mean_conf": 0.6238059055118111,
"calib/mu_c": 0.6263376543209876,
"calib/mu_w": 0.6193478260869566,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.01404212598425204,
"calib/std_conf": 0.107397666383644,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2254.0,
"completions/max_terminated_length": 2254.0,
"completions/mean_length": 584.8203125,
"completions/mean_terminated_length": 589.4251708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.009193326346576214,
"learning_rate": 2.1944444444444445e-06,
"loss": 0.0392,
"num_tokens": 24462977.0,
"reward": 1.533304214477539,
"reward_std": 0.3090783357620239,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.7494633197784424,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9217903017997742,
"step": 121
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5856741573033708,
"calib/avg_num_step_conf": 1.0546875,
"calib/ece": 0.09127865612648213,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.08722732255412435,
"calib/mean_conf": 0.6137055335968379,
"calib/mu_c": 0.644390243902439,
"calib/mu_w": 0.5571629213483147,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.028381422924901187,
"calib/std_conf": 0.13510463390930255,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2641.0,
"completions/max_terminated_length": 2641.0,
"completions/mean_length": 572.28125,
"completions/mean_terminated_length": 579.0671997070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.010109186172485352,
"learning_rate": 2.166666666666667e-06,
"loss": -0.0212,
"num_tokens": 24666393.0,
"reward": 1.5562331676483154,
"reward_std": 0.3059636354446411,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.783021092414856,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.910453200340271,
"step": 122
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5110211869982891,
"calib/avg_num_step_conf": 1.015625,
"calib/ece": 0.09377290836653387,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.005591064613765018,
"calib/mean_conf": 0.630211155378486,
"calib/mu_c": 0.6324832214765101,
"calib/mu_w": 0.6268921568627451,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.06517928286852591,
"calib/std_conf": 0.11383390795653611,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1743.0,
"completions/max_terminated_length": 1743.0,
"completions/mean_length": 615.42578125,
"completions/mean_terminated_length": 625.1944580078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.1312,
"grad_norm": 0.011224367655813694,
"learning_rate": 2.138888888888889e-06,
"loss": -0.0461,
"num_tokens": 24878798.0,
"reward": 1.4726450443267822,
"reward_std": 0.3578389883041382,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.7325735092163086,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9051204919815063,
"step": 123
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5085185185185185,
"calib/avg_num_step_conf": 1.0703125,
"calib/ece": 0.07579098039215706,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0038219191919192097,
"calib/mean_conf": 0.6290458823529413,
"calib/mu_c": 0.6276969696969698,
"calib/mu_w": 0.631518888888889,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.028889019607843347,
"calib/std_conf": 0.12206579314285393,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1656.0,
"completions/max_terminated_length": 1656.0,
"completions/mean_length": 566.50390625,
"completions/mean_terminated_length": 568.7255249023438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.010063174180686474,
"learning_rate": 2.1111111111111114e-06,
"loss": -0.0046,
"num_tokens": 25080207.0,
"reward": 1.5498743057250977,
"reward_std": 0.2400267869234085,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.7517082691192627,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9226433038711548,
"step": 124
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5515110334562868,
"calib/avg_num_step_conf": 1.0390625,
"calib/ece": 0.0782782283464567,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03312156150909196,
"calib/mean_conf": 0.6342152362204725,
"calib/mu_c": 0.6473856209150327,
"calib/mu_w": 0.6142640594059408,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.055065629921259865,
"calib/std_conf": 0.12405846288079574,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2065.0,
"completions/max_terminated_length": 2065.0,
"completions/mean_length": 560.87109375,
"completions/mean_terminated_length": 565.2874145507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.010898683220148087,
"learning_rate": 2.0833333333333334e-06,
"loss": -0.0144,
"num_tokens": 25278166.0,
"reward": 1.5032649040222168,
"reward_std": 0.30852293968200684,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.754002571105957,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9238045811653137,
"step": 125
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5038186745503819,
"calib/avg_num_step_conf": 1.0234375,
"calib/ece": 0.17431372549019622,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.00893754619364373,
"calib/mean_conf": 0.6475686274509805,
"calib/mu_c": 0.6521951219512196,
"calib/mu_w": 0.6432575757575759,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.16976470588235307,
"calib/std_conf": 0.09208462750973315,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2794.0,
"completions/max_terminated_length": 2794.0,
"completions/mean_length": 559.76953125,
"completions/mean_terminated_length": 559.76953125,
"completions/min_length": 115.0,
"completions/min_terminated_length": 115.0,
"epoch": 0.1344,
"grad_norm": 0.008297329768538475,
"learning_rate": 2.0555555555555555e-06,
"loss": 0.0476,
"num_tokens": 25476499.0,
"reward": 1.361807107925415,
"reward_std": 0.25627726316452026,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.7161902189254761,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8976604342460632,
"step": 126
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5173343605546995,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.1359468000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02282682331792507,
"calib/mean_conf": 0.6373068000000001,
"calib/mu_c": 0.6480810606060606,
"calib/mu_w": 0.6252542372881356,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.9765625,
"calib/nonempty_step_conf_rate": 0.97265625,
"calib/pce": 0.12262680000000008,
"calib/std_conf": 0.11726038646431285,
"calib/step_conf_rate": 0.97265625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2954.0,
"completions/max_terminated_length": 2954.0,
"completions/mean_length": 525.640625,
"completions/mean_terminated_length": 533.984130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.011081523261964321,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0004,
"num_tokens": 25664303.0,
"reward": 1.3940820693969727,
"reward_std": 0.3191128671169281,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.7170500755310059,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.9125405550003052,
"step": 127
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.4591626409017713,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.13092000000000004,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01260869565217404,
"calib/mean_conf": 0.6562,
"calib/mu_c": 0.662,
"calib/mu_w": 0.649391304347826,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.12356000000000004,
"calib/std_conf": 0.10301242643487242,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2674.0,
"completions/max_terminated_length": 2674.0,
"completions/mean_length": 587.64453125,
"completions/mean_terminated_length": 599.3505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 190.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.007573532871901989,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.0376,
"num_tokens": 25870972.0,
"reward": 1.4005755186080933,
"reward_std": 0.3928867280483246,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.716552734375,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8879466652870178,
"step": 128
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.532983165861934,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.06474509803921571,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.027578624559572162,
"calib/mean_conf": 0.6372941176470588,
"calib/mu_c": 0.6477848101265824,
"calib/mu_w": 0.6202061855670102,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.04121568627450983,
"calib/std_conf": 0.11884794934468638,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1536.0,
"completions/mean_length": 498.01171875,
"completions/mean_terminated_length": 499.9647216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.1376,
"grad_norm": 0.010780357755720615,
"learning_rate": 1.9722222222222224e-06,
"loss": 0.0086,
"num_tokens": 26050415.0,
"reward": 1.526939868927002,
"reward_std": 0.26041966676712036,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.7569589614868164,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9344668984413147,
"step": 129
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.535741935483871,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.04375176470588247,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.041687838709677294,
"calib/mean_conf": 0.6490066666666668,
"calib/mu_c": 0.6653548387096774,
"calib/mu_w": 0.6236670000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04245764705882365,
"calib/std_conf": 0.1050140472147336,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1662.0,
"completions/max_terminated_length": 1662.0,
"completions/mean_length": 487.39453125,
"completions/mean_terminated_length": 489.305908203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 156.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.007861044257879257,
"learning_rate": 1.944444444444445e-06,
"loss": -0.028,
"num_tokens": 26230044.0,
"reward": 1.5061434507369995,
"reward_std": 0.20071539282798767,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.759964108467102,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8968331813812256,
"step": 130
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5314141217661934,
"calib/avg_num_step_conf": 1.03515625,
"calib/ece": 0.26455729166666664,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.026790723810758754,
"calib/mean_conf": 0.6434635416666666,
"calib/mu_c": 0.6601030927835052,
"calib/mu_w": 0.6333123689727465,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26455729166666664,
"calib/std_conf": 0.11376442395099201,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1332.0,
"completions/max_terminated_length": 1332.0,
"completions/mean_length": 488.61328125,
"completions/mean_terminated_length": 490.5294494628906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 235.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.008661036379635334,
"learning_rate": 1.916666666666667e-06,
"loss": 0.022,
"num_tokens": 26410905.0,
"reward": 1.2555246353149414,
"reward_std": 0.19447124004364014,
"rewards/accuracy_reward_step": 0.37890625,
"rewards/final_brier_reward_step": 0.6908596158027649,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9294419288635254,
"step": 131
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49281045751633984,
"calib/avg_num_step_conf": 1.02734375,
"calib/ece": 0.07675889328063243,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04155882352941187,
"calib/mean_conf": 0.6406324110671936,
"calib/mu_c": 0.6570588235294118,
"calib/mu_w": 0.6154999999999999,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.05632411067193677,
"calib/std_conf": 0.11819623955263713,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1451.0,
"completions/max_terminated_length": 1451.0,
"completions/mean_length": 534.12890625,
"completions/mean_terminated_length": 536.2235717773438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 153.0,
"epoch": 0.1408,
"grad_norm": 0.010745419189333916,
"learning_rate": 1.888888888888889e-06,
"loss": 0.008,
"num_tokens": 26602802.0,
"reward": 1.4952141046524048,
"reward_std": 0.36359143257141113,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.7531276941299438,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9027261734008789,
"step": 132
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5754966460268318,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.20268000000000014,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03622420020639827,
"calib/mean_conf": 0.65468,
"calib/mu_c": 0.6743859649122806,
"calib/mu_w": 0.6381617647058824,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.20068000000000014,
"calib/std_conf": 0.10599291297063215,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1770.0,
"completions/max_terminated_length": 1770.0,
"completions/mean_length": 580.69140625,
"completions/mean_terminated_length": 592.2589721679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.008619986474514008,
"learning_rate": 1.8611111111111113e-06,
"loss": -0.0055,
"num_tokens": 26807371.0,
"reward": 1.306413173675537,
"reward_std": 0.37664172053337097,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.6880574226379395,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.9151625037193298,
"step": 133
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5193303301284207,
"calib/avg_num_step_conf": 0.9765625,
"calib/ece": 0.09009349593495942,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02689995293484837,
"calib/mean_conf": 0.6551341463414635,
"calib/mu_c": 0.6668345323741007,
"calib/mu_w": 0.6399345794392524,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.96484375,
"calib/nonempty_step_conf_rate": 0.9609375,
"calib/pce": 0.09009349593495942,
"calib/std_conf": 0.08956091112481515,
"calib/step_conf_rate": 0.9609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2625.0,
"completions/max_terminated_length": 2625.0,
"completions/mean_length": 603.98828125,
"completions/mean_terminated_length": 611.1502075195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 177.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.007842494174838066,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0022,
"num_tokens": 27020512.0,
"reward": 1.4165414571762085,
"reward_std": 0.3975376486778259,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.7219664454460144,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.8972331285476685,
"step": 134
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5355844330729868,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.03385039682539689,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.031358241314301094,
"calib/mean_conf": 0.6608345238095239,
"calib/mu_c": 0.6725316455696204,
"calib/mu_w": 0.6411734042553193,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.03385039682539689,
"calib/std_conf": 0.07078657811647564,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1919.0,
"completions/max_terminated_length": 1919.0,
"completions/mean_length": 526.890625,
"completions/mean_terminated_length": 531.0393676757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 200.0,
"epoch": 0.144,
"grad_norm": 0.008880026638507843,
"learning_rate": 1.8055555555555557e-06,
"loss": 0.0014,
"num_tokens": 27210844.0,
"reward": 1.5298209190368652,
"reward_std": 0.2332524210214615,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.7603797912597656,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9375864267349243,
"step": 135
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5934523809523811,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.13391304347826086,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.003952569169960474,
"calib/gap": 0.07988095238095239,
"calib/mean_conf": 0.5921739130434783,
"calib/mu_c": 0.6341666666666667,
"calib/mu_w": 0.5542857142857143,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.1258893280632411,
"calib/std_conf": 0.16718199531236863,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2463.0,
"completions/max_terminated_length": 2463.0,
"completions/mean_length": 549.484375,
"completions/mean_terminated_length": 549.484375,
"completions/min_length": 159.0,
"completions/min_terminated_length": 159.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.006931147072464228,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0455,
"num_tokens": 27409568.0,
"reward": 1.3625285625457764,
"reward_std": 0.28099530935287476,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.7359734177589417,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.921917200088501,
"step": 136
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5247395833333334,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.05992063492063495,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.011698717948717907,
"calib/mean_conf": 0.6551587301587303,
"calib/mu_c": 0.6596153846153846,
"calib/mu_w": 0.6479166666666667,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.048015873015873047,
"calib/std_conf": 0.08016089105998236,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1667.0,
"completions/max_terminated_length": 1667.0,
"completions/mean_length": 513.90625,
"completions/mean_terminated_length": 522.0635375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.00839358102530241,
"learning_rate": 1.75e-06,
"loss": -0.059,
"num_tokens": 27597680.0,
"reward": 1.5042455196380615,
"reward_std": 0.3065860867500305,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.746573805809021,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9097723960876465,
"step": 137
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5489845938375351,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.036059288537549423,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03486099439775914,
"calib/mean_conf": 0.6279723320158103,
"calib/mu_c": 0.6396845238095238,
"calib/mu_w": 0.6048235294117646,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.12983462887471897,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1324.0,
"completions/max_terminated_length": 1324.0,
"completions/mean_length": 526.61328125,
"completions/mean_terminated_length": 532.8577270507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 103.0,
"epoch": 0.1472,
"grad_norm": 0.007144542410969734,
"learning_rate": 1.7222222222222224e-06,
"loss": -0.0157,
"num_tokens": 27786397.0,
"reward": 1.565363883972168,
"reward_std": 0.2977532148361206,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.7652297019958496,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9200586080551147,
"step": 138
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5472686199342827,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.014921259842519755,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03043948521358164,
"calib/mean_conf": 0.6531889763779528,
"calib/mu_c": 0.6637349397590362,
"calib/mu_w": 0.6332954545454546,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.007283464566929203,
"calib/std_conf": 0.08604880800333448,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2195.0,
"completions/max_terminated_length": 2195.0,
"completions/mean_length": 491.1875,
"completions/mean_terminated_length": 493.11376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 143.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.010381989181041718,
"learning_rate": 1.6944444444444446e-06,
"loss": 0.0109,
"num_tokens": 27964805.0,
"reward": 1.560642957687378,
"reward_std": 0.23459778726100922,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7682285308837891,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.915489673614502,
"step": 139
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5138813282525857,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.030941176470588257,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.014763881328252682,
"calib/mean_conf": 0.6525098039215687,
"calib/mu_c": 0.6576047904191618,
"calib/mu_w": 0.6428409090909091,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.01427450980392159,
"calib/std_conf": 0.10476104884384996,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1629.0,
"completions/max_terminated_length": 1629.0,
"completions/mean_length": 495.05078125,
"completions/mean_terminated_length": 496.9921875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 131.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.010000730864703655,
"learning_rate": 1.6666666666666667e-06,
"loss": -0.0318,
"num_tokens": 28146122.0,
"reward": 1.5590664148330688,
"reward_std": 0.27282488346099854,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7630187273025513,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9102286100387573,
"step": 140
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5237432112519149,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.04509881422924892,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02759086478206385,
"calib/mean_conf": 0.6377470355731226,
"calib/mu_c": 0.6471257485029941,
"calib/mu_w": 0.6195348837209302,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.011383399209486167,
"calib/std_conf": 0.12127478044768873,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2179.0,
"completions/max_terminated_length": 2179.0,
"completions/mean_length": 549.09765625,
"completions/mean_terminated_length": 551.2510375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 199.0,
"epoch": 0.1504,
"grad_norm": 0.007173791527748108,
"learning_rate": 1.638888888888889e-06,
"loss": 0.0293,
"num_tokens": 28343355.0,
"reward": 1.5514299869537354,
"reward_std": 0.2732362151145935,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7581105828285217,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9035614132881165,
"step": 141
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5766574159728647,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.09984375000000009,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0523860622880048,
"calib/mean_conf": 0.619375,
"calib/mu_c": 0.6429078014184397,
"calib/mu_w": 0.5905217391304349,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.08421875000000009,
"calib/std_conf": 0.1368150553667249,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1569.0,
"completions/max_terminated_length": 1569.0,
"completions/mean_length": 568.1015625,
"completions/mean_terminated_length": 570.3294677734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.011100290343165398,
"learning_rate": 1.6111111111111113e-06,
"loss": -0.0116,
"num_tokens": 28543517.0,
"reward": 1.4611070156097412,
"reward_std": 0.2612740695476532,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.7529253959655762,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.940139889717102,
"step": 142
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5708579484425348,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.04076000000000009,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0664419978517724,
"calib/mean_conf": 0.6035600000000001,
"calib/mu_c": 0.6296052631578948,
"calib/mu_w": 0.5631632653061224,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.98046875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.01816000000000009,
"calib/std_conf": 0.15534904698774307,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2268.0,
"completions/max_terminated_length": 2268.0,
"completions/mean_length": 548.51953125,
"completions/mean_terminated_length": 561.6840209960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 248.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.010165936313569546,
"learning_rate": 1.5833333333333333e-06,
"loss": -0.0858,
"num_tokens": 28740842.0,
"reward": 1.4874682426452637,
"reward_std": 0.2669368386268616,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.7490015029907227,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.9096827507019043,
"step": 143
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5699814471243043,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.07992063492063484,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.05187532467532463,
"calib/mean_conf": 0.6184920634920635,
"calib/mu_c": 0.6343428571428572,
"calib/mu_w": 0.5824675324675326,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.001984126984126984,
"calib/std_conf": 0.1319034486974533,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 1586.0,
"completions/max_terminated_length": 1586.0,
"completions/mean_length": 511.84765625,
"completions/mean_terminated_length": 515.8779296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 61.0,
"epoch": 0.1536,
"grad_norm": 0.011674090288579464,
"learning_rate": 1.5555555555555558e-06,
"loss": -0.0547,
"num_tokens": 28925571.0,
"reward": 1.585937261581421,
"reward_std": 0.277024507522583,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.7722121477127075,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8883872032165527,
"step": 144
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5292712066905616,
"calib/avg_num_step_conf": 1.02734375,
"calib/ece": 0.04329411764705898,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.027363600159299062,
"calib/mean_conf": 0.6145882352941178,
"calib/mu_c": 0.624567901234568,
"calib/mu_w": 0.5972043010752689,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.011294117647058982,
"calib/std_conf": 0.13318322576922959,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2523.0,
"completions/max_terminated_length": 2523.0,
"completions/mean_length": 514.52734375,
"completions/mean_terminated_length": 516.5451049804688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.0095293540507555,
"learning_rate": 1.527777777777778e-06,
"loss": 0.037,
"num_tokens": 29109562.0,
"reward": 1.5433859825134277,
"reward_std": 0.329196572303772,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.7598382830619812,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9273048639297485,
"step": 145
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.578062996031746,
"calib/avg_num_step_conf": 1.04296875,
"calib/ece": 0.1430446194225722,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0636024305555557,
"calib/mean_conf": 0.6351706036745408,
"calib/mu_c": 0.6672222222222224,
"calib/mu_w": 0.6036197916666667,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.14107611548556434,
"calib/std_conf": 0.11527646304810632,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1977.0,
"completions/max_terminated_length": 1977.0,
"completions/mean_length": 532.34375,
"completions/mean_terminated_length": 534.431396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 168.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.009710113517940044,
"learning_rate": 1.5e-06,
"loss": 0.0232,
"num_tokens": 29302626.0,
"reward": 1.3976036310195923,
"reward_std": 0.2787143588066101,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.7433222532272339,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9443954229354858,
"step": 146
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5880142892338014,
"calib/avg_num_step_conf": 1.0234375,
"calib/ece": 0.1289411764705883,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.08953621581670357,
"calib/mean_conf": 0.6112941176470588,
"calib/mu_c": 0.6576422764227642,
"calib/mu_w": 0.5681060606060606,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1289411764705883,
"calib/std_conf": 0.1503626067039598,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1418.0,
"completions/max_terminated_length": 1418.0,
"completions/mean_length": 543.97265625,
"completions/mean_terminated_length": 546.1058959960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 169.0,
"epoch": 0.1568,
"grad_norm": 0.010358953848481178,
"learning_rate": 1.4722222222222225e-06,
"loss": 0.0001,
"num_tokens": 29495131.0,
"reward": 1.3876032829284668,
"reward_std": 0.18397116661071777,
"rewards/accuracy_reward_step": 0.48046875,
"rewards/final_brier_reward_step": 0.7528367042541504,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9275525808334351,
"step": 147
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.511323207443897,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.013333333333333405,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.010843596059113114,
"calib/mean_conf": 0.655764705882353,
"calib/mu_c": 0.6594642857142856,
"calib/mu_w": 0.6486206896551725,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0051372549019608575,
"calib/std_conf": 0.07447662832117177,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2731.0,
"completions/max_terminated_length": 2731.0,
"completions/mean_length": 506.578125,
"completions/mean_terminated_length": 506.578125,
"completions/min_length": 117.0,
"completions/min_terminated_length": 117.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.00780367199331522,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.014,
"num_tokens": 29679495.0,
"reward": 1.567508578300476,
"reward_std": 0.2392946481704712,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.7715179920196533,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9066858291625977,
"step": 148
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5755208333333334,
"calib/avg_num_step_conf": 1.02734375,
"calib/ece": 0.018366015624999975,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.05477905844155839,
"calib/mean_conf": 0.6378839843749999,
"calib/mu_c": 0.6567142857142857,
"calib/mu_w": 0.6019352272727273,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0,
"calib/std_conf": 0.11053841322448618,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1669.0,
"completions/max_terminated_length": 1669.0,
"completions/mean_length": 571.94140625,
"completions/mean_terminated_length": 574.184326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 161.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.007841997779905796,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.058,
"num_tokens": 29879936.0,
"reward": 1.5820075273513794,
"reward_std": 0.2582079768180847,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.7865728139877319,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.929884672164917,
"step": 149
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5208812260536398,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.15889328063241115,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0028933588761175244,
"calib/mean_conf": 0.6145454545454546,
"calib/mu_c": 0.6133103448275863,
"calib/mu_w": 0.6162037037037038,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.10015810276679851,
"calib/std_conf": 0.14518453198995135,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2568.0,
"completions/max_terminated_length": 2568.0,
"completions/mean_length": 476.0390625,
"completions/mean_terminated_length": 479.78741455078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 154.0,
"epoch": 0.16,
"grad_norm": 0.009207825176417828,
"learning_rate": 1.3888888888888892e-06,
"loss": -0.0237,
"num_tokens": 30056330.0,
"reward": 1.4508132934570312,
"reward_std": 0.2670350670814514,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.7225687503814697,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9065535068511963,
"step": 150
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.555614242217113,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.1509566929133859,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.042970049089666285,
"calib/mean_conf": 0.6273346456692914,
"calib/mu_c": 0.6498347107438017,
"calib/mu_w": 0.6068646616541354,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.1509566929133859,
"calib/std_conf": 0.1171076050557022,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2029.0,
"completions/max_terminated_length": 2029.0,
"completions/mean_length": 565.8125,
"completions/mean_terminated_length": 570.2677001953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 195.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.008064262568950653,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0237,
"num_tokens": 30257770.0,
"reward": 1.3659863471984863,
"reward_std": 0.2792539596557617,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.7297468781471252,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9232016205787659,
"step": 151
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5315154922538731,
"calib/avg_num_step_conf": 1.05078125,
"calib/ece": 0.1092913385826772,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.021996501749125374,
"calib/mean_conf": 0.6348818897637796,
"calib/mu_c": 0.644927536231884,
"calib/mu_w": 0.6229310344827587,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.10043307086614177,
"calib/std_conf": 0.10816920357085563,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1918.0,
"completions/max_terminated_length": 1918.0,
"completions/mean_length": 531.640625,
"completions/mean_terminated_length": 533.7255249023438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 189.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.008094547316432,
"learning_rate": 1.3333333333333334e-06,
"loss": -0.0085,
"num_tokens": 30448830.0,
"reward": 1.4350159168243408,
"reward_std": 0.3242112696170807,
"rewards/accuracy_reward_step": 0.5390625,
"rewards/final_brier_reward_step": 0.7347495555877686,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9283766150474548,
"step": 152
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5265229736706246,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.08624505928853769,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02604349509550863,
"calib/mean_conf": 0.6175494071146246,
"calib/mu_c": 0.628255033557047,
"calib/mu_w": 0.6022115384615384,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.057430830039525836,
"calib/std_conf": 0.1446145296415369,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2109.0,
"completions/max_terminated_length": 2109.0,
"completions/mean_length": 532.80078125,
"completions/mean_terminated_length": 534.8901977539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.1632,
"grad_norm": 0.006869491655379534,
"learning_rate": 1.3055555555555556e-06,
"loss": -0.0,
"num_tokens": 30642115.0,
"reward": 1.4726076126098633,
"reward_std": 0.2840556502342224,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.7330499887466431,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9196433424949646,
"step": 153
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.515411376953125,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.1566796875000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.023828124999999867,
"calib/mean_conf": 0.6541015625,
"calib/mu_c": 0.666015625,
"calib/mu_w": 0.6421875000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.15539062500000012,
"calib/std_conf": 0.08811820362478229,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1158.0,
"completions/max_terminated_length": 1158.0,
"completions/mean_length": 473.9453125,
"completions/mean_terminated_length": 475.803955078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.010958393104374409,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.0028,
"num_tokens": 30817453.0,
"reward": 1.4038617610931396,
"reward_std": 0.25540071725845337,
"rewards/accuracy_reward_step": 0.5,
"rewards/final_brier_reward_step": 0.7304019331932068,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9546434283256531,
"step": 154
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4685954953305255,
"calib/avg_num_step_conf": 1.03515625,
"calib/ece": 0.14996093750000009,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.021834218397119076,
"calib/mean_conf": 0.6323046875,
"calib/mu_c": 0.6433070866141734,
"calib/mu_w": 0.6214728682170543,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.1430859375000001,
"calib/std_conf": 0.13924377558988893,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1488.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 483.54296875,
"completions/mean_terminated_length": 485.4392395019531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.00791851058602333,
"learning_rate": 1.25e-06,
"loss": 0.0116,
"num_tokens": 30998024.0,
"reward": 1.3909826278686523,
"reward_std": 0.2507619261741638,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.7212804555892944,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9416821002960205,
"step": 155
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5218097300050942,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.08592549019607845,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.033762544574630904,
"calib/mean_conf": 0.6125215686274509,
"calib/mu_c": 0.626291390728477,
"calib/mu_w": 0.592528846153846,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.053145098039215716,
"calib/std_conf": 0.15812955263690356,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1843.0,
"completions/max_terminated_length": 1843.0,
"completions/mean_length": 549.921875,
"completions/mean_terminated_length": 552.0784912109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.1664,
"grad_norm": 0.008392121642827988,
"learning_rate": 1.2222222222222223e-06,
"loss": -0.0058,
"num_tokens": 31193132.0,
"reward": 1.4988298416137695,
"reward_std": 0.20456862449645996,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.7443010210990906,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9410922527313232,
"step": 156
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4828834759710335,
"calib/avg_num_step_conf": 1.02734375,
"calib/ece": 0.04411067193675897,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.020063857801185025,
"calib/mean_conf": 0.6566798418972333,
"calib/mu_c": 0.6644516129032259,
"calib/mu_w": 0.6443877551020408,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.04407114624505937,
"calib/std_conf": 0.08901771967302509,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1223.0,
"completions/max_terminated_length": 1223.0,
"completions/mean_length": 496.69921875,
"completions/mean_terminated_length": 498.6470947265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 114.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.009014573879539967,
"learning_rate": 1.1944444444444446e-06,
"loss": -0.0128,
"num_tokens": 31373583.0,
"reward": 1.505033254623413,
"reward_std": 0.2585407495498657,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.7499347925186157,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9015136957168579,
"step": 157
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.52470703125,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.03218750000000011,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.025124999999999953,
"calib/mean_conf": 0.651328125,
"calib/mu_c": 0.66075,
"calib/mu_w": 0.635625,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.029257812500000112,
"calib/std_conf": 0.0879574745771181,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1595.0,
"completions/max_terminated_length": 1595.0,
"completions/mean_length": 509.56640625,
"completions/mean_terminated_length": 511.5647277832031,
"completions/min_length": 0.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.00884316861629486,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0338,
"num_tokens": 31558840.0,
"reward": 1.542033076286316,
"reward_std": 0.3029077649116516,
"rewards/accuracy_reward_step": 0.625,
"rewards/final_brier_reward_step": 0.768972635269165,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9301871657371521,
"step": 158
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5182214299861359,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.05412698412698415,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.023553178847296352,
"calib/mean_conf": 0.6612698412698413,
"calib/mu_c": 0.6705228758169934,
"calib/mu_w": 0.6469696969696971,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.05412698412698415,
"calib/std_conf": 0.06500285863047514,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1510.0,
"completions/max_terminated_length": 1510.0,
"completions/mean_length": 471.26171875,
"completions/mean_terminated_length": 473.1098327636719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.1696,
"grad_norm": 0.011129575781524181,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0116,
"num_tokens": 31733835.0,
"reward": 1.500402808189392,
"reward_std": 0.2676931321620941,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.7535984516143799,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9225396513938904,
"step": 159
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5519440883843009,
"calib/avg_num_step_conf": 1.0234375,
"calib/ece": 0.15042968750000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.032352438503326475,
"calib/mean_conf": 0.6445703125,
"calib/mu_c": 0.6606201550387597,
"calib/mu_w": 0.6282677165354332,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.14554687500000002,
"calib/std_conf": 0.10163031840278935,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1552.0,
"completions/max_terminated_length": 1552.0,
"completions/mean_length": 495.98828125,
"completions/mean_terminated_length": 497.933349609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.008333866484463215,
"learning_rate": 1.111111111111111e-06,
"loss": 0.004,
"num_tokens": 31915216.0,
"reward": 1.4053471088409424,
"reward_std": 0.2977757155895233,
"rewards/accuracy_reward_step": 0.50390625,
"rewards/final_brier_reward_step": 0.7360754013061523,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9336129426956177,
"step": 160
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5120133714046939,
"calib/avg_num_step_conf": 1.02734375,
"calib/ece": 0.03992187500000008,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0011908907305523408,
"calib/mean_conf": 0.641484375,
"calib/mu_c": 0.6410982658959538,
"calib/mu_w": 0.6422891566265061,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.002812500000000077,
"calib/std_conf": 0.10969125594530942,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1505.0,
"completions/max_terminated_length": 1505.0,
"completions/mean_length": 476.921875,
"completions/mean_terminated_length": 478.79217529296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 144.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.007851874455809593,
"learning_rate": 1.0833333333333335e-06,
"loss": 0.0153,
"num_tokens": 32090796.0,
"reward": 1.5846412181854248,
"reward_std": 0.2483762502670288,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.7650160193443298,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9100950956344604,
"step": 161
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5350936329588014,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.0473122529644268,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.023658426966292168,
"calib/mean_conf": 0.6562450592885377,
"calib/mu_c": 0.6632584269662921,
"calib/mu_w": 0.6396,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.0,
"calib/std_conf": 0.07882869820921898,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1624.0,
"completions/max_terminated_length": 1624.0,
"completions/mean_length": 464.62890625,
"completions/mean_terminated_length": 466.4510192871094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 135.0,
"epoch": 0.1728,
"grad_norm": 0.008597790263593197,
"learning_rate": 1.0555555555555557e-06,
"loss": 0.0053,
"num_tokens": 32263453.0,
"reward": 1.608614444732666,
"reward_std": 0.2592753767967224,
"rewards/accuracy_reward_step": 0.6953125,
"rewards/final_brier_reward_step": 0.78007972240448,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9117984771728516,
"step": 162
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5342346812205407,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.10692823529411782,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.036115647482014346,
"calib/mean_conf": 0.640261568627451,
"calib/mu_c": 0.6566906474820144,
"calib/mu_w": 0.6205750000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.10104588235294135,
"calib/std_conf": 0.1102153941664453,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2483.0,
"completions/max_terminated_length": 2483.0,
"completions/mean_length": 570.4296875,
"completions/mean_terminated_length": 572.6666870117188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 129.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.008555339649319649,
"learning_rate": 1.0277777777777777e-06,
"loss": -0.0165,
"num_tokens": 32463883.0,
"reward": 1.445723056793213,
"reward_std": 0.30785322189331055,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.7436636686325073,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9330652356147766,
"step": 163
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5751426958539798,
"calib/avg_num_step_conf": 1.01953125,
"calib/ece": 0.08320312500000002,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04292667628426272,
"calib/mean_conf": 0.6450781250000001,
"calib/mu_c": 0.6630201342281881,
"calib/mu_w": 0.6200934579439253,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.07312500000000001,
"calib/std_conf": 0.09925500690889288,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1440.0,
"completions/max_terminated_length": 1440.0,
"completions/mean_length": 571.4375,
"completions/mean_terminated_length": 573.678466796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 216.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.008878960274159908,
"learning_rate": 1.0000000000000002e-06,
"loss": -0.0092,
"num_tokens": 32665875.0,
"reward": 1.499582290649414,
"reward_std": 0.28364017605781555,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.7637882828712463,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9426275491714478,
"step": 164
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5401851851851852,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.1142352941176471,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03287037037037055,
"calib/mean_conf": 0.6382352941176471,
"calib/mu_c": 0.6537037037037039,
"calib/mu_w": 0.6208333333333333,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11152941176470593,
"calib/std_conf": 0.1090836319267252,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1636.0,
"completions/max_terminated_length": 1636.0,
"completions/mean_length": 532.71484375,
"completions/mean_terminated_length": 534.803955078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 202.0,
"epoch": 0.176,
"grad_norm": 0.008074701763689518,
"learning_rate": 9.722222222222224e-07,
"loss": 0.0129,
"num_tokens": 32857394.0,
"reward": 1.4330260753631592,
"reward_std": 0.2663751542568207,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.7405972480773926,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.946222186088562,
"step": 165
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.6394679842967376,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.07301960784313707,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.09980370921889803,
"calib/mean_conf": 0.6305882352941177,
"calib/mu_c": 0.665421686746988,
"calib/mu_w": 0.56561797752809,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.02631372549019608,
"calib/std_conf": 0.11282837504162183,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1700.0,
"completions/max_terminated_length": 1700.0,
"completions/mean_length": 532.42578125,
"completions/mean_terminated_length": 534.5137329101562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 196.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.008140970021486282,
"learning_rate": 9.444444444444445e-07,
"loss": -0.0144,
"num_tokens": 33049447.0,
"reward": 1.583161473274231,
"reward_std": 0.2504641115665436,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.801856279373169,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.939871072769165,
"step": 166
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6087347729789591,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.05496093749999998,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.07891196013289037,
"calib/mean_conf": 0.6298046875000001,
"calib/mu_c": 0.6556976744186046,
"calib/mu_w": 0.5767857142857142,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.006445312500000001,
"calib/std_conf": 0.12323900550567318,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1315.0,
"completions/max_terminated_length": 1315.0,
"completions/mean_length": 498.234375,
"completions/mean_terminated_length": 500.1882629394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 187.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.007336960639804602,
"learning_rate": 9.166666666666666e-07,
"loss": -0.0083,
"num_tokens": 33232171.0,
"reward": 1.6025943756103516,
"reward_std": 0.23913165926933289,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.7973769903182983,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9281233549118042,
"step": 167
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5206031976744185,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.054563492063491995,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02935465116279068,
"calib/mean_conf": 0.6517857142857143,
"calib/mu_c": 0.6611046511627907,
"calib/mu_w": 0.63175,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.011904761904761906,
"calib/std_conf": 0.08414123681302946,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2670.0,
"completions/max_terminated_length": 2670.0,
"completions/mean_length": 556.69140625,
"completions/mean_terminated_length": 561.0748291015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.1792,
"grad_norm": 0.009717374108731747,
"learning_rate": 8.88888888888889e-07,
"loss": -0.0281,
"num_tokens": 33428924.0,
"reward": 1.580051064491272,
"reward_std": 0.311530739068985,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.772222638130188,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9116963744163513,
"step": 168
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5652173913043479,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.01580392156862746,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.041521739130434776,
"calib/mean_conf": 0.6550196078431374,
"calib/mu_c": 0.67,
"calib/mu_w": 0.6284782608695653,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.01580392156862746,
"calib/std_conf": 0.07417453984104336,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1551.0,
"completions/max_terminated_length": 1551.0,
"completions/mean_length": 525.1015625,
"completions/mean_terminated_length": 527.1608276367188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.00645485008135438,
"learning_rate": 8.611111111111112e-07,
"loss": 0.0181,
"num_tokens": 33617102.0,
"reward": 1.5582530498504639,
"reward_std": 0.18583500385284424,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/final_brier_reward_step": 0.779723048210144,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9313783049583435,
"step": 169
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5354248448778017,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.07203921568627457,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.02870203874889199,
"calib/mean_conf": 0.6524313725490197,
"calib/mu_c": 0.6643624161073827,
"calib/mu_w": 0.6356603773584907,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.07007843137254909,
"calib/std_conf": 0.08116051751928682,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1713.0,
"completions/max_terminated_length": 1713.0,
"completions/mean_length": 527.96875,
"completions/mean_terminated_length": 530.0392456054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.0113950464874506,
"learning_rate": 8.333333333333333e-07,
"loss": -0.011,
"num_tokens": 33805982.0,
"reward": 1.4930140972137451,
"reward_std": 0.2828036844730377,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.753375768661499,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9465547800064087,
"step": 170
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5311561561561562,
"calib/avg_num_step_conf": 1.05859375,
"calib/ece": 0.12414062500000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.013443443443443281,
"calib/mean_conf": 0.64203125,
"calib/mu_c": 0.6477027027027027,
"calib/mu_w": 0.6342592592592594,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.09402343750000014,
"calib/std_conf": 0.11789732937364401,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1158.0,
"completions/max_terminated_length": 1158.0,
"completions/mean_length": 486.09765625,
"completions/mean_terminated_length": 488.0039367675781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 119.0,
"epoch": 0.1824,
"grad_norm": 0.010750283487141132,
"learning_rate": 8.055555555555557e-07,
"loss": -0.0019,
"num_tokens": 33986887.0,
"reward": 1.4827287197113037,
"reward_std": 0.22716417908668518,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.7425246238708496,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9380537271499634,
"step": 171
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5929768880208333,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.11585937499999993,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0741666666666666,
"calib/mean_conf": 0.6400000000000001,
"calib/mu_c": 0.6585416666666667,
"calib/mu_w": 0.5843750000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0029296875,
"calib/std_conf": 0.10352762312542485,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1438.0,
"completions/max_terminated_length": 1438.0,
"completions/mean_length": 492.12890625,
"completions/mean_terminated_length": 494.058837890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 146.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.0088827945291996,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0473,
"num_tokens": 34165792.0,
"reward": 1.6941030025482178,
"reward_std": 0.21069438755512238,
"rewards/accuracy_reward_step": 0.75,
"rewards/final_brier_reward_step": 0.8174945116043091,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9414230585098267,
"step": 172
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5189829083776969,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.06450980392156858,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.026133370692070668,
"calib/mean_conf": 0.6384705882352942,
"calib/mu_c": 0.6469767441860466,
"calib/mu_w": 0.6208433734939759,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.01423529411764706,
"calib/std_conf": 0.10817747816438932,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2060.0,
"completions/max_terminated_length": 2060.0,
"completions/mean_length": 570.3828125,
"completions/mean_terminated_length": 570.3828125,
"completions/min_length": 124.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.007619481533765793,
"learning_rate": 7.5e-07,
"loss": 0.0098,
"num_tokens": 34364538.0,
"reward": 1.5769091844558716,
"reward_std": 0.18090735375881195,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.775884747505188,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8730547428131104,
"step": 173
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5833974112520826,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.05586274509803936,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0673039215686273,
"calib/mean_conf": 0.6109215686274511,
"calib/mu_c": 0.6378431372549019,
"calib/mu_w": 0.5705392156862746,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.03339215686274525,
"calib/std_conf": 0.14042803517889893,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2777.0,
"completions/max_terminated_length": 2777.0,
"completions/mean_length": 583.55859375,
"completions/mean_terminated_length": 585.8471069335938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.1856,
"grad_norm": 0.011764385737478733,
"learning_rate": 7.222222222222222e-07,
"loss": 0.0101,
"num_tokens": 34567729.0,
"reward": 1.5053462982177734,
"reward_std": 0.30150508880615234,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.7672964334487915,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9055424928665161,
"step": 174
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5537757437070938,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.1930952380952381,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03488176964149503,
"calib/mean_conf": 0.6256349206349208,
"calib/mu_c": 0.6447368421052632,
"calib/mu_w": 0.6098550724637681,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.984375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.18317460317460318,
"calib/std_conf": 0.1256285532183025,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1485.0,
"completions/max_terminated_length": 1485.0,
"completions/mean_length": 530.48828125,
"completions/mean_terminated_length": 538.9087524414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 95.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.008950725197792053,
"learning_rate": 6.944444444444446e-07,
"loss": -0.015,
"num_tokens": 34758926.0,
"reward": 1.3267838954925537,
"reward_std": 0.2932436466217041,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.71073317527771,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.927856981754303,
"step": 175
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6392082149315422,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.03378906249999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.09797339188840093,
"calib/mean_conf": 0.5873046875,
"calib/mu_c": 0.6248101265822785,
"calib/mu_w": 0.5268367346938776,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.001953125000000001,
"calib/std_conf": 0.1554107490347027,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1722.0,
"completions/max_terminated_length": 1722.0,
"completions/mean_length": 542.42578125,
"completions/mean_terminated_length": 544.552978515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 167.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.009000916965305805,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0089,
"num_tokens": 34951419.0,
"reward": 1.5398871898651123,
"reward_std": 0.24812811613082886,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.7793495655059814,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.941474974155426,
"step": 176
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.49983406345413506,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.06494117647058828,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.001180804460374385,
"calib/mean_conf": 0.6324705882352941,
"calib/mu_c": 0.6329012345679013,
"calib/mu_w": 0.6317204301075269,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.031058823529411805,
"calib/std_conf": 0.11191284300323062,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1931.0,
"completions/max_terminated_length": 1931.0,
"completions/mean_length": 500.0859375,
"completions/mean_terminated_length": 502.0470886230469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 188.0,
"epoch": 0.1888,
"grad_norm": 0.008004814386367798,
"learning_rate": 6.388888888888889e-07,
"loss": 0.0152,
"num_tokens": 35132841.0,
"reward": 1.5370360612869263,
"reward_std": 0.1942150890827179,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.7533648610115051,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9148522615432739,
"step": 177
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5527701778385773,
"calib/avg_num_step_conf": 1.03125,
"calib/ece": 0.07167968749999984,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04614637482900141,
"calib/mean_conf": 0.6130859375000001,
"calib/mu_c": 0.6285882352941178,
"calib/mu_w": 0.5824418604651164,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0103515625,
"calib/std_conf": 0.1351628374026903,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1578.0,
"completions/max_terminated_length": 1578.0,
"completions/mean_length": 516.81640625,
"completions/mean_terminated_length": 518.8431396484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 71.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.009003140032291412,
"learning_rate": 6.111111111111112e-07,
"loss": 0.0263,
"num_tokens": 35320786.0,
"reward": 1.5860583782196045,
"reward_std": 0.24094577133655548,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/final_brier_reward_step": 0.7766379117965698,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9347081184387207,
"step": 178
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5568947906026558,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.011259842519684905,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.04089479060265577,
"calib/mean_conf": 0.6433070866141734,
"calib/mu_c": 0.6576363636363637,
"calib/mu_w": 0.6167415730337079,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.002480314960629923,
"calib/std_conf": 0.09975209522090753,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2807.0,
"completions/max_terminated_length": 2807.0,
"completions/mean_length": 546.37890625,
"completions/mean_terminated_length": 548.5216064453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 225.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.008436013013124466,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0183,
"num_tokens": 35516491.0,
"reward": 1.5619540214538574,
"reward_std": 0.28580376505851746,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.7749070525169373,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9292519092559814,
"step": 179
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.591525974025974,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.03720472440944883,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.06620000000000004,
"calib/mean_conf": 0.6389370078740159,
"calib/mu_c": 0.665,
"calib/mu_w": 0.5988,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.03492125984251969,
"calib/std_conf": 0.102791988415371,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2703.0,
"completions/max_terminated_length": 2703.0,
"completions/mean_length": 593.40625,
"completions/mean_terminated_length": 595.7333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 207.0,
"epoch": 0.192,
"grad_norm": 0.007525734603404999,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0181,
"num_tokens": 35721827.0,
"reward": 1.5143935680389404,
"reward_std": 0.21014289557933807,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.7734594345092773,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9184682965278625,
"step": 180
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5050963676797628,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.12420703125000007,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": -0.0004802322708179485,
"calib/mean_conf": 0.66584765625,
"calib/mu_c": 0.6656338028169014,
"calib/mu_w": 0.6661140350877194,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.11768359375000008,
"calib/std_conf": 0.059081348136546745,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1346.0,
"completions/max_terminated_length": 1346.0,
"completions/mean_length": 493.46875,
"completions/mean_terminated_length": 495.4039611816406,
"completions/min_length": 0.0,
"completions/min_terminated_length": 175.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.010766234248876572,
"learning_rate": 5.277777777777779e-07,
"loss": 0.0202,
"num_tokens": 35903987.0,
"reward": 1.4602396488189697,
"reward_std": 0.33089858293533325,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.7334253787994385,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.960045576095581,
"step": 181
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5239290495314592,
"calib/avg_num_step_conf": 1.0078125,
"calib/ece": 0.07320312500000015,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0030040160642570424,
"calib/mean_conf": 0.6382812500000001,
"calib/mu_c": 0.6393373493975905,
"calib/mu_w": 0.6363333333333334,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.031523437500000154,
"calib/std_conf": 0.11587715326343455,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1180.0,
"completions/max_terminated_length": 1180.0,
"completions/mean_length": 544.88671875,
"completions/mean_terminated_length": 547.0235595703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 185.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.007987387478351593,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0008,
"num_tokens": 36099206.0,
"reward": 1.5611926317214966,
"reward_std": 0.2433495819568634,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.7598726749420166,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9312751889228821,
"step": 182
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5222813995820131,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.03771653543307099,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.01627856805770911,
"calib/mean_conf": 0.6422047244094489,
"calib/mu_c": 0.6480368098159509,
"calib/mu_w": 0.6317582417582418,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.019094488188976508,
"calib/std_conf": 0.0966397106704452,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2947.0,
"completions/max_terminated_length": 2947.0,
"completions/mean_length": 568.30859375,
"completions/mean_terminated_length": 568.30859375,
"completions/min_length": 222.0,
"completions/min_terminated_length": 222.0,
"epoch": 0.1952,
"grad_norm": 0.009835210628807545,
"learning_rate": 4.7222222222222226e-07,
"loss": 0.0282,
"num_tokens": 36300941.0,
"reward": 1.5496151447296143,
"reward_std": 0.25276634097099304,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/final_brier_reward_step": 0.7622320652008057,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9364967346191406,
"step": 183
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5460555732992654,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.04847656250000017,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.033758543596295154,
"calib/mean_conf": 0.6511328125,
"calib/mu_c": 0.6644516129032259,
"calib/mu_w": 0.6306930693069307,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.04707031250000017,
"calib/std_conf": 0.08471854791508082,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1210.0,
"completions/max_terminated_length": 1210.0,
"completions/mean_length": 526.40625,
"completions/mean_terminated_length": 528.4706420898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 172.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.009076988324522972,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0007,
"num_tokens": 36490549.0,
"reward": 1.522108793258667,
"reward_std": 0.27043280005455017,
"rewards/accuracy_reward_step": 0.60546875,
"rewards/final_brier_reward_step": 0.767989456653595,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9305816888809204,
"step": 184
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6082545311268716,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.03281249999999993,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.08436826897819805,
"calib/mean_conf": 0.624453125,
"calib/mu_c": 0.6554320987654322,
"calib/mu_w": 0.5710638297872341,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0122265625,
"calib/std_conf": 0.12645903853712623,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1319.0,
"completions/max_terminated_length": 1319.0,
"completions/mean_length": 502.0078125,
"completions/mean_terminated_length": 503.97650146484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.009935007430613041,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0068,
"num_tokens": 36675551.0,
"reward": 1.5608875751495361,
"reward_std": 0.28984230756759644,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.7907851934432983,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9307299256324768,
"step": 185
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5613543091655266,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.03773437499999985,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.047255813953488324,
"calib/mean_conf": 0.6481250000000001,
"calib/mu_c": 0.664,
"calib/mu_w": 0.6167441860465117,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.0108984375,
"calib/std_conf": 0.08651002470812272,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1385.0,
"completions/max_terminated_length": 1385.0,
"completions/mean_length": 527.25390625,
"completions/mean_terminated_length": 529.3215942382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.1984,
"grad_norm": 0.007592697162181139,
"learning_rate": 3.8888888888888895e-07,
"loss": 0.0263,
"num_tokens": 36865136.0,
"reward": 1.5913572311401367,
"reward_std": 0.2117360234260559,
"rewards/accuracy_reward_step": 0.6640625,
"rewards/final_brier_reward_step": 0.7867816686630249,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9403030872344971,
"step": 186
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5813946177062375,
"calib/avg_num_step_conf": 1.015625,
"calib/ece": 0.07122047244094504,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.07018737424547283,
"calib/mean_conf": 0.6146850393700787,
"calib/mu_c": 0.6456338028169014,
"calib/mu_w": 0.5754464285714286,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.06342519685039386,
"calib/std_conf": 0.1372124739392378,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2159.0,
"completions/max_terminated_length": 2159.0,
"completions/mean_length": 571.64453125,
"completions/mean_terminated_length": 576.1456909179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.007867695763707161,
"learning_rate": 3.611111111111111e-07,
"loss": -0.0058,
"num_tokens": 37062589.0,
"reward": 1.4636740684509277,
"reward_std": 0.32446834444999695,
"rewards/accuracy_reward_step": 0.5546875,
"rewards/final_brier_reward_step": 0.7601839303970337,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9249536991119385,
"step": 187
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5852736518131941,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.019960474308300342,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0659346982470227,
"calib/mean_conf": 0.6384584980237155,
"calib/mu_c": 0.6629559748427675,
"calib/mu_w": 0.5970212765957448,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.014980237154150196,
"calib/std_conf": 0.11008977318084041,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2540.0,
"completions/max_terminated_length": 2540.0,
"completions/mean_length": 594.90625,
"completions/mean_terminated_length": 599.590576171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 138.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.009555697441101074,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0029,
"num_tokens": 37268525.0,
"reward": 1.5442224740982056,
"reward_std": 0.28254127502441406,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.775873064994812,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.9548314809799194,
"step": 188
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.533833141099577,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.05854901960784313,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.032549019607843066,
"calib/mean_conf": 0.6487450980392159,
"calib/mu_c": 0.6617647058823529,
"calib/mu_w": 0.6292156862745099,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.05364705882352941,
"calib/std_conf": 0.08995856195008704,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1387.0,
"completions/max_terminated_length": 1387.0,
"completions/mean_length": 512.140625,
"completions/mean_terminated_length": 514.1490478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.2016,
"grad_norm": 0.011023632250726223,
"learning_rate": 3.055555555555556e-07,
"loss": -0.0082,
"num_tokens": 37456969.0,
"reward": 1.5090863704681396,
"reward_std": 0.28942179679870605,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.7621660232543945,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9260757565498352,
"step": 189
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5403963414634146,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.048710937500000134,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0273038176033934,
"calib/mean_conf": 0.6223828125,
"calib/mu_c": 0.6321951219512195,
"calib/mu_w": 0.6048913043478261,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.015234375000000133,
"calib/std_conf": 0.1250881952047828,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1537.0,
"completions/max_terminated_length": 1537.0,
"completions/mean_length": 566.47265625,
"completions/mean_terminated_length": 568.6941528320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 184.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.008933677338063717,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0068,
"num_tokens": 37657162.0,
"reward": 1.5577049255371094,
"reward_std": 0.24305100739002228,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.7642148733139038,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.944577693939209,
"step": 190
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.537377450980392,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.12867187500000016,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.0370588235294117,
"calib/mean_conf": 0.6321875,
"calib/mu_c": 0.6495588235294117,
"calib/mu_w": 0.6125,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.11480468750000017,
"calib/std_conf": 0.12020449697806652,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1497.0,
"completions/max_terminated_length": 1497.0,
"completions/mean_length": 518.5859375,
"completions/mean_terminated_length": 520.61962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 147.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.008770892396569252,
"learning_rate": 2.5000000000000004e-07,
"loss": -0.0015,
"num_tokens": 37843656.0,
"reward": 1.4288978576660156,
"reward_std": 0.27863526344299316,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.7413152456283569,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9126487374305725,
"step": 191
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6281150583244963,
"calib/avg_num_step_conf": 1.0,
"calib/ece": 0.06605468749999997,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.10552757158006365,
"calib/mean_conf": 0.6104296874999999,
"calib/mu_c": 0.6483536585365854,
"calib/mu_w": 0.5428260869565218,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0179296875,
"calib/std_conf": 0.14564582381123167,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2105.0,
"completions/max_terminated_length": 2105.0,
"completions/mean_length": 574.82421875,
"completions/mean_terminated_length": 577.0784912109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.2048,
"grad_norm": 0.009189280681312084,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0108,
"num_tokens": 38045355.0,
"reward": 1.5728929042816162,
"reward_std": 0.2598698139190674,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.796241044998169,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.9365895986557007,
"step": 192
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5541661430187256,
"calib/avg_num_step_conf": 0.99609375,
"calib/ece": 0.06607843137254915,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.05032047253990213,
"calib/mean_conf": 0.6386274509803922,
"calib/mu_c": 0.66013698630137,
"calib/mu_w": 0.6098165137614678,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.06607843137254915,
"calib/std_conf": 0.11078719413225781,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2171.0,
"completions/max_terminated_length": 2171.0,
"completions/mean_length": 549.984375,
"completions/mean_terminated_length": 552.1412353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 164.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.010155064053833485,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.0631,
"num_tokens": 38241431.0,
"reward": 1.4826997518539429,
"reward_std": 0.34446632862091064,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.7602722644805908,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9336919784545898,
"step": 193
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5665559529892692,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.03239215686274509,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.05743484925907005,
"calib/mean_conf": 0.6262745098039216,
"calib/mu_c": 0.6494736842105264,
"calib/mu_w": 0.5920388349514564,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.03129411764705882,
"calib/std_conf": 0.12509072447727654,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1533.0,
"completions/max_terminated_length": 1533.0,
"completions/mean_length": 492.75390625,
"completions/mean_terminated_length": 494.6863098144531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 180.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.01012510061264038,
"learning_rate": 1.6666666666666668e-07,
"loss": -0.0096,
"num_tokens": 38423088.0,
"reward": 1.509757399559021,
"reward_std": 0.23923927545547485,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.7656105160713196,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9421834945678711,
"step": 194
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5831740570846075,
"calib/avg_num_step_conf": 0.984375,
"calib/ece": 0.07458498023715415,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.05845948012232416,
"calib/mean_conf": 0.6318972332015811,
"calib/mu_c": 0.6570833333333334,
"calib/mu_w": 0.5986238532110092,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.06865612648221345,
"calib/std_conf": 0.11559588405872054,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2545.0,
"completions/max_terminated_length": 2545.0,
"completions/mean_length": 501.42578125,
"completions/mean_terminated_length": 505.3740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 151.0,
"epoch": 0.208,
"grad_norm": 0.008480598218739033,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.002,
"num_tokens": 38607005.0,
"reward": 1.4583773612976074,
"reward_std": 0.20689699053764343,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.7502175569534302,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.9065118432044983,
"step": 195
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5238853503184713,
"calib/avg_num_step_conf": 1.00390625,
"calib/ece": 0.0626274509803922,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.009576238138567561,
"calib/mean_conf": 0.6585490196078433,
"calib/mu_c": 0.6622292993630574,
"calib/mu_w": 0.6526530612244898,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.052745098039215725,
"calib/std_conf": 0.0748793313521476,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1311.0,
"completions/max_terminated_length": 1311.0,
"completions/mean_length": 422.578125,
"completions/mean_terminated_length": 424.2353210449219,
"completions/min_length": 0.0,
"completions/min_terminated_length": 155.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.010814669542014599,
"learning_rate": 1.1111111111111112e-07,
"loss": -0.0152,
"num_tokens": 38767297.0,
"reward": 1.524308204650879,
"reward_std": 0.20253178477287292,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.757500410079956,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.9337950944900513,
"step": 196
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5414506172839506,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.11517647058823531,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.03487037037037044,
"calib/mean_conf": 0.6186274509803922,
"calib/mu_c": 0.6350370370370372,
"calib/mu_w": 0.6001666666666667,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.10219607843137257,
"calib/std_conf": 0.13773830839452345,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 1680.0,
"completions/max_terminated_length": 1680.0,
"completions/mean_length": 563.4453125,
"completions/mean_terminated_length": 565.6549072265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.009498359635472298,
"learning_rate": 8.333333333333334e-08,
"loss": -0.0003,
"num_tokens": 38966163.0,
"reward": 1.4292974472045898,
"reward_std": 0.3322080075740814,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.736260175704956,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9446693062782288,
"step": 197
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5260466602748483,
"calib/avg_num_step_conf": 0.9921875,
"calib/ece": 0.08169291338582693,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.022558644934483896,
"calib/mean_conf": 0.6216141732283467,
"calib/mu_c": 0.6309395973154364,
"calib/mu_w": 0.6083809523809525,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.05834645669291356,
"calib/std_conf": 0.12935086552973982,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 3043.0,
"completions/max_terminated_length": 3043.0,
"completions/mean_length": 482.859375,
"completions/mean_terminated_length": 484.7529602050781,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.2112,
"grad_norm": 0.010535329580307007,
"learning_rate": 5.555555555555556e-08,
"loss": 0.003,
"num_tokens": 39144727.0,
"reward": 1.4822851419448853,
"reward_std": 0.20785585045814514,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.744623064994812,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.9211447238922119,
"step": 198
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5248013154288846,
"calib/avg_num_step_conf": 0.98828125,
"calib/ece": 0.010434782608695634,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.025798163880515368,
"calib/mean_conf": 0.6496442687747036,
"calib/mu_c": 0.658719512195122,
"calib/mu_w": 0.6329213483146067,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.005928853754940711,
"calib/std_conf": 0.08768583843255115,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2623.0,
"completions/max_terminated_length": 2623.0,
"completions/mean_length": 543.15625,
"completions/mean_terminated_length": 545.2863159179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 171.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.011450027115643024,
"learning_rate": 2.777777777777778e-08,
"loss": -0.0003,
"num_tokens": 39337543.0,
"reward": 1.5476951599121094,
"reward_std": 0.2952195107936859,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.7634691596031189,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.9200922846794128,
"step": 199
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.6058044519582981,
"calib/avg_num_step_conf": 0.98046875,
"calib/ece": 0.059288537549407,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.0,
"calib/gap": 0.07738165680473374,
"calib/mean_conf": 0.6241897233201582,
"calib/mu_c": 0.6498816568047338,
"calib/mu_w": 0.5725,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.0077470355731225305,
"calib/std_conf": 0.13392757899749022,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2830.0,
"completions/max_terminated_length": 2830.0,
"completions/mean_length": 535.6171875,
"completions/mean_terminated_length": 539.8346557617188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 124.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.007556810975074768,
"learning_rate": 0.0,
"loss": -0.0125,
"num_tokens": 39532277.0,
"reward": 1.5712368488311768,
"reward_std": 0.19881883263587952,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/final_brier_reward_step": 0.7790929675102234,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.909574031829834,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": 0.008669512395572383,
"train_runtime": 8904.7917,
"train_samples_per_second": 5.75,
"train_steps_per_second": 0.022
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 39532277,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}