Files
GSPO-7B-v5-main/trainer_state.json
ModelHub XC 4b24731c58 初始化项目,由ModelHub XC社区提供模型
Model: zhaohq/GSPO-7B-v5-main
Source: Original Platform
2026-05-30 11:56:07 +08:00

8643 lines
335 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.21333333333333335,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.75,
"calib/avg_num_step_conf": 0.359375,
"calib/ece": 0.5285714285714285,
"calib/final_conf_rate": 0.0546875,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.9285714285714286,
"calib/gap": 0.02833333333333321,
"calib/mean_conf": 0.9571428571428572,
"calib/mu_c": 0.9733333333333333,
"calib/mu_w": 0.9450000000000001,
"calib/nonempty_final_conf_rate": 0.0546875,
"calib/nonempty_reasoning_rate": 0.078125,
"calib/nonempty_step_conf_rate": 0.05859375,
"calib/pce": 0.5285714285714285,
"calib/std_conf": 0.033896601479156206,
"calib/step_conf_rate": 0.05859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 2853.0,
"completions/max_terminated_length": 2853.0,
"completions/mean_length": 658.8203125,
"completions/mean_terminated_length": 714.6525268554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0010666666666666667,
"grad_norm": 0.09998760372400284,
"learning_rate": 2.5000000000000004e-07,
"loss": 0.0145,
"num_tokens": 276242.0,
"reward": 0.07658073306083679,
"reward_std": 0.14345498383045197,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/final_brier_reward_step": 0.024793751537799835,
"rewards/format_reward_step": 0.04296875,
"rewards/stepwise_brier_reward": 0.03152916580438614,
"step": 1
},
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.1851851851851852,
"calib/avg_num_step_conf": 0.24609375,
"calib/ece": 0.2141666666666665,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.01666666666666672,
"calib/mean_conf": 0.9641666666666665,
"calib/mu_c": 0.9599999999999999,
"calib/mu_w": 0.9766666666666666,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.0546875,
"calib/pce": 0.2141666666666665,
"calib/std_conf": 0.014409680388158833,
"calib/step_conf_rate": 0.0546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 2927.0,
"completions/max_terminated_length": 2927.0,
"completions/mean_length": 749.54296875,
"completions/mean_terminated_length": 820.0128784179688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0021333333333333334,
"grad_norm": 0.0952862948179245,
"learning_rate": 5.000000000000001e-07,
"loss": -0.0143,
"num_tokens": 571413.0,
"reward": 0.09923964738845825,
"reward_std": 0.2225067913532257,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/final_brier_reward_step": 0.03563320264220238,
"rewards/format_reward_step": 0.046875,
"rewards/stepwise_brier_reward": 0.03320039063692093,
"step": 2
},
{
"calib/answer_extract_rate": 0.02734375,
"calib/auroc": 0.4444444444444445,
"calib/avg_num_step_conf": 0.19140625,
"calib/ece": 0.46833333333333327,
"calib/final_conf_rate": 0.0234375,
"calib/format_rate": 0.01953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0033333333333334103,
"calib/mean_conf": 0.9683333333333333,
"calib/mu_c": 0.9666666666666667,
"calib/mu_w": 0.9700000000000001,
"calib/nonempty_final_conf_rate": 0.0234375,
"calib/nonempty_reasoning_rate": 0.04296875,
"calib/nonempty_step_conf_rate": 0.03515625,
"calib/pce": 0.46833333333333327,
"calib/std_conf": 0.01343709624716426,
"calib/step_conf_rate": 0.03515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 2791.0,
"completions/max_terminated_length": 2791.0,
"completions/mean_length": 676.359375,
"completions/mean_terminated_length": 736.7999877929688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0032,
"grad_norm": 0.05166647955775261,
"learning_rate": 7.5e-07,
"loss": -0.0104,
"num_tokens": 849817.0,
"reward": 0.03318578749895096,
"reward_std": 0.08771081268787384,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.012013280764222145,
"rewards/format_reward_step": 0.01953125,
"rewards/stepwise_brier_reward": 0.011354869231581688,
"step": 3
},
{
"calib/answer_extract_rate": 0.04296875,
"calib/auroc": 0.5857142857142856,
"calib/avg_num_step_conf": 0.29296875,
"calib/ece": 0.3483333333333334,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.0390625,
"calib/frac_conf_gt_0.9": 0.8333333333333334,
"calib/gap": 0.04057142857142848,
"calib/mean_conf": 0.9316666666666666,
"calib/mu_c": 0.9485714285714285,
"calib/mu_w": 0.908,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.046875,
"calib/pce": 0.3483333333333334,
"calib/std_conf": 0.07525881269917091,
"calib/step_conf_rate": 0.046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 3030.0,
"completions/max_terminated_length": 3030.0,
"completions/mean_length": 731.8671875,
"completions/mean_terminated_length": 836.419677734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.004266666666666667,
"grad_norm": 0.015893759205937386,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0075,
"num_tokens": 1143343.0,
"reward": 0.0737013965845108,
"reward_std": 0.1637091338634491,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/final_brier_reward_step": 0.026279686018824577,
"rewards/format_reward_step": 0.0390625,
"rewards/stepwise_brier_reward": 0.026338398456573486,
"step": 4
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.375,
"calib/avg_num_step_conf": 0.125,
"calib/ece": 0.5357142857142858,
"calib/final_conf_rate": 0.02734375,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.007499999999999951,
"calib/mean_conf": 0.9642857142857143,
"calib/mu_c": 0.96,
"calib/mu_w": 0.9674999999999999,
"calib/nonempty_final_conf_rate": 0.02734375,
"calib/nonempty_reasoning_rate": 0.03125,
"calib/nonempty_step_conf_rate": 0.02734375,
"calib/pce": 0.5357142857142858,
"calib/std_conf": 0.019897697538834472,
"calib/step_conf_rate": 0.02734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09765625,
"completions/max_length": 3007.0,
"completions/max_terminated_length": 3007.0,
"completions/mean_length": 741.08984375,
"completions/mean_terminated_length": 821.2943725585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.005333333333333333,
"grad_norm": 0.011378278955817223,
"learning_rate": 1.25e-06,
"loss": -0.0188,
"num_tokens": 1439750.0,
"reward": 0.03778243437409401,
"reward_std": 0.1001213788986206,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.012688672170042992,
"rewards/format_reward_step": 0.02734375,
"rewards/stepwise_brier_reward": 0.013441067188978195,
"step": 5
},
{
"calib/answer_extract_rate": 0.05859375,
"calib/auroc": 0.4571428571428572,
"calib/avg_num_step_conf": 0.1875,
"calib/ece": 0.5433333333333332,
"calib/final_conf_rate": 0.046875,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.9166666666666666,
"calib/gap": -0.013714285714285679,
"calib/mean_conf": 0.96,
"calib/mu_c": 0.952,
"calib/mu_w": 0.9657142857142856,
"calib/nonempty_final_conf_rate": 0.046875,
"calib/nonempty_reasoning_rate": 0.0703125,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.5433333333333332,
"calib/std_conf": 0.03135814620371129,
"calib/step_conf_rate": 0.04296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 2990.0,
"completions/max_terminated_length": 2990.0,
"completions/mean_length": 626.26953125,
"completions/mean_terminated_length": 691.0560302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0064,
"grad_norm": 0.016327349469065666,
"learning_rate": 1.5e-06,
"loss": -0.0036,
"num_tokens": 1706027.0,
"reward": 0.05387546867132187,
"reward_std": 0.10294744372367859,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.016544923186302185,
"rewards/format_reward_step": 0.03125,
"rewards/stepwise_brier_reward": 0.01926945522427559,
"step": 6
},
{
"calib/answer_extract_rate": 0.05078125,
"calib/auroc": 0.32,
"calib/avg_num_step_conf": 0.26171875,
"calib/ece": 0.594,
"calib/final_conf_rate": 0.05859375,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 0.9333333333333333,
"calib/gap": 0.04599999999999993,
"calib/mean_conf": 0.9273333333333333,
"calib/mu_c": 0.9579999999999999,
"calib/mu_w": 0.9119999999999999,
"calib/nonempty_final_conf_rate": 0.05859375,
"calib/nonempty_reasoning_rate": 0.0546875,
"calib/nonempty_step_conf_rate": 0.05078125,
"calib/pce": 0.594,
"calib/std_conf": 0.1610576156397317,
"calib/step_conf_rate": 0.05078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0859375,
"completions/max_length": 3069.0,
"completions/max_terminated_length": 3069.0,
"completions/mean_length": 796.58203125,
"completions/mean_terminated_length": 871.4744262695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.007466666666666667,
"grad_norm": 0.014986117370426655,
"learning_rate": 1.75e-06,
"loss": 0.0125,
"num_tokens": 2017376.0,
"reward": 0.06252811849117279,
"reward_std": 0.15057691931724548,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.02071210741996765,
"rewards/format_reward_step": 0.04296875,
"rewards/stepwise_brier_reward": 0.026275351643562317,
"step": 7
},
{
"calib/answer_extract_rate": 0.046875,
"calib/auroc": 0.4,
"calib/avg_num_step_conf": 0.1328125,
"calib/ece": 0.4640000000000001,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.02734375,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": -0.0040000000000000036,
"calib/mean_conf": 0.9640000000000001,
"calib/mu_c": 0.962,
"calib/mu_w": 0.966,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.03515625,
"calib/pce": 0.4640000000000001,
"calib/std_conf": 0.02870540018881465,
"calib/step_conf_rate": 0.03515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0625,
"completions/max_length": 3057.0,
"completions/max_terminated_length": 3057.0,
"completions/mean_length": 675.36328125,
"completions/mean_terminated_length": 720.3875122070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.008533333333333334,
"grad_norm": 0.009805521927773952,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0075,
"num_tokens": 2296781.0,
"reward": 0.053026750683784485,
"reward_std": 0.12672922015190125,
"rewards/accuracy_reward_step": 0.01953125,
"rewards/final_brier_reward_step": 0.02003437466919422,
"rewards/format_reward_step": 0.02734375,
"rewards/stepwise_brier_reward": 0.02019762247800827,
"step": 8
},
{
"calib/answer_extract_rate": 0.03125,
"calib/auroc": 0.16666666666666663,
"calib/avg_num_step_conf": 0.12890625,
"calib/ece": 0.5988888888888889,
"calib/final_conf_rate": 0.03515625,
"calib/format_rate": 0.0234375,
"calib/frac_conf_gt_0.9": 0.7777777777777778,
"calib/gap": -0.03833333333333333,
"calib/mean_conf": 0.9322222222222223,
"calib/mu_c": 0.9066666666666666,
"calib/mu_w": 0.945,
"calib/nonempty_final_conf_rate": 0.03515625,
"calib/nonempty_reasoning_rate": 0.0390625,
"calib/nonempty_step_conf_rate": 0.03125,
"calib/pce": 0.5988888888888889,
"calib/std_conf": 0.06196374886043859,
"calib/step_conf_rate": 0.03125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2990.0,
"completions/max_terminated_length": 2990.0,
"completions/mean_length": 663.30859375,
"completions/mean_terminated_length": 738.291259765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 4.0,
"epoch": 0.0096,
"grad_norm": 0.045146096497774124,
"learning_rate": 2.25e-06,
"loss": 0.0096,
"num_tokens": 2574124.0,
"reward": 0.03620094433426857,
"reward_std": 0.09545932710170746,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.013463281095027924,
"rewards/format_reward_step": 0.0234375,
"rewards/stepwise_brier_reward": 0.014152996242046356,
"step": 9
},
{
"calib/answer_extract_rate": 0.05859375,
"calib/auroc": 0.5625,
"calib/avg_num_step_conf": 0.21484375,
"calib/ece": 0.7539999999999999,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03125,
"calib/frac_conf_gt_0.9": 0.9,
"calib/gap": 0.020000000000000018,
"calib/mean_conf": 0.954,
"calib/mu_c": 0.97,
"calib/mu_w": 0.95,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.06640625,
"calib/nonempty_step_conf_rate": 0.0390625,
"calib/pce": 0.7539999999999999,
"calib/std_conf": 0.03611094017053558,
"calib/step_conf_rate": 0.0390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1171875,
"completions/max_length": 2949.0,
"completions/max_terminated_length": 2949.0,
"completions/mean_length": 685.203125,
"completions/mean_terminated_length": 776.1593017578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 3.0,
"epoch": 0.010666666666666666,
"grad_norm": 0.008990222588181496,
"learning_rate": 2.5e-06,
"loss": -0.0291,
"num_tokens": 2856336.0,
"reward": 0.033995941281318665,
"reward_std": 0.07464214414358139,
"rewards/accuracy_reward_step": 0.0078125,
"rewards/final_brier_reward_step": 0.009489063173532486,
"rewards/format_reward_step": 0.03125,
"rewards/stepwise_brier_reward": 0.01711970567703247,
"step": 10
},
{
"calib/answer_extract_rate": 0.04296875,
"calib/auroc": 0.4761904761904762,
"calib/avg_num_step_conf": 0.23828125,
"calib/ece": 0.6400000000000001,
"calib/final_conf_rate": 0.0390625,
"calib/format_rate": 0.03515625,
"calib/frac_conf_gt_0.9": 0.8,
"calib/gap": 0.009523809523809601,
"calib/mean_conf": 0.9400000000000001,
"calib/mu_c": 0.9466666666666667,
"calib/mu_w": 0.937142857142857,
"calib/nonempty_final_conf_rate": 0.0390625,
"calib/nonempty_reasoning_rate": 0.05078125,
"calib/nonempty_step_conf_rate": 0.04296875,
"calib/pce": 0.6400000000000001,
"calib/std_conf": 0.0679705818718657,
"calib/step_conf_rate": 0.04296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.12109375,
"completions/max_length": 3068.0,
"completions/max_terminated_length": 3068.0,
"completions/mean_length": 726.953125,
"completions/mean_terminated_length": 827.1111450195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 13.0,
"epoch": 0.011733333333333333,
"grad_norm": 0.015153302811086178,
"learning_rate": 2.7500000000000004e-06,
"loss": 0.0004,
"num_tokens": 3146916.0,
"reward": 0.04430060833692551,
"reward_std": 0.1161235123872757,
"rewards/accuracy_reward_step": 0.01171875,
"rewards/final_brier_reward_step": 0.01468046847730875,
"rewards/format_reward_step": 0.03515625,
"rewards/stepwise_brier_reward": 0.021896956488490105,
"step": 11
},
{
"calib/answer_extract_rate": 0.0703125,
"calib/auroc": 0.47727272727272724,
"calib/avg_num_step_conf": 0.3828125,
"calib/ece": 0.6117647058823528,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.04296875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0021212121212120794,
"calib/mean_conf": 0.9647058823529411,
"calib/mu_c": 0.9633333333333333,
"calib/mu_w": 0.9654545454545453,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.08203125,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.6117647058823528,
"calib/std_conf": 0.020896846668625433,
"calib/step_conf_rate": 0.0703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08203125,
"completions/max_length": 3026.0,
"completions/max_terminated_length": 3026.0,
"completions/mean_length": 675.05859375,
"completions/mean_terminated_length": 735.3829345703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.0128,
"grad_norm": 0.014090793207287788,
"learning_rate": 3e-06,
"loss": 0.0051,
"num_tokens": 3423907.0,
"reward": 0.07500467449426651,
"reward_std": 0.17258451879024506,
"rewards/accuracy_reward_step": 0.02734375,
"rewards/final_brier_reward_step": 0.021091407164931297,
"rewards/format_reward_step": 0.04296875,
"rewards/stepwise_brier_reward": 0.028927277773618698,
"step": 12
},
{
"calib/answer_extract_rate": 0.06640625,
"calib/auroc": 0.5303030303030303,
"calib/avg_num_step_conf": 0.3984375,
"calib/ece": 0.3279411764705882,
"calib/final_conf_rate": 0.06640625,
"calib/format_rate": 0.0546875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0038636363636362914,
"calib/mean_conf": 0.975,
"calib/mu_c": 0.9763636363636364,
"calib/mu_w": 0.9725000000000001,
"calib/nonempty_final_conf_rate": 0.06640625,
"calib/nonempty_reasoning_rate": 0.078125,
"calib/nonempty_step_conf_rate": 0.0703125,
"calib/pce": 0.3279411764705882,
"calib/std_conf": 0.01680336100833613,
"calib/step_conf_rate": 0.0703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 3001.0,
"completions/max_terminated_length": 3001.0,
"completions/mean_length": 767.99609375,
"completions/mean_terminated_length": 843.806884765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.013866666666666666,
"grad_norm": 0.01971305161714554,
"learning_rate": 3.2500000000000002e-06,
"loss": -0.0259,
"num_tokens": 3725106.0,
"reward": 0.10816285759210587,
"reward_std": 0.20174898207187653,
"rewards/accuracy_reward_step": 0.04296875,
"rewards/final_brier_reward_step": 0.03248114883899689,
"rewards/format_reward_step": 0.0546875,
"rewards/stepwise_brier_reward": 0.03298277035355568,
"step": 13
},
{
"calib/answer_extract_rate": 0.09375,
"calib/auroc": 0.6192307692307693,
"calib/avg_num_step_conf": 0.4375,
"calib/ece": 0.5260869565217392,
"calib/final_conf_rate": 0.08984375,
"calib/format_rate": 0.07421875,
"calib/frac_conf_gt_0.9": 0.9565217391304348,
"calib/gap": 0.009076923076922983,
"calib/mean_conf": 0.9608695652173914,
"calib/mu_c": 0.966,
"calib/mu_w": 0.956923076923077,
"calib/nonempty_final_conf_rate": 0.08984375,
"calib/nonempty_reasoning_rate": 0.09765625,
"calib/nonempty_step_conf_rate": 0.08984375,
"calib/pce": 0.5260869565217392,
"calib/std_conf": 0.025179882148817844,
"calib/step_conf_rate": 0.08984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 3064.0,
"completions/max_terminated_length": 3064.0,
"completions/mean_length": 835.2421875,
"completions/mean_terminated_length": 906.0254516601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.014933333333333333,
"grad_norm": 0.020166773349046707,
"learning_rate": 3.5e-06,
"loss": -0.0202,
"num_tokens": 4044328.0,
"reward": 0.11651969701051712,
"reward_std": 0.2357882559299469,
"rewards/accuracy_reward_step": 0.0390625,
"rewards/final_brier_reward_step": 0.031532421708106995,
"rewards/format_reward_step": 0.07421875,
"rewards/stepwise_brier_reward": 0.05173388123512268,
"step": 14
},
{
"calib/answer_extract_rate": 0.12109375,
"calib/auroc": 0.45714285714285713,
"calib/avg_num_step_conf": 0.6484375,
"calib/ece": 0.456896551724138,
"calib/final_conf_rate": 0.11328125,
"calib/format_rate": 0.09765625,
"calib/frac_conf_gt_0.9": 0.896551724137931,
"calib/gap": 0.03795238095238118,
"calib/mean_conf": 0.9396551724137929,
"calib/mu_c": 0.9592857142857144,
"calib/mu_w": 0.9213333333333332,
"calib/nonempty_final_conf_rate": 0.11328125,
"calib/nonempty_reasoning_rate": 0.1328125,
"calib/nonempty_step_conf_rate": 0.1171875,
"calib/pce": 0.456896551724138,
"calib/std_conf": 0.12411015014815611,
"calib/step_conf_rate": 0.1171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.08984375,
"completions/max_length": 2925.0,
"completions/max_terminated_length": 2925.0,
"completions/mean_length": 692.48828125,
"completions/mean_terminated_length": 760.8455200195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.021689649671316147,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0017,
"num_tokens": 4329485.0,
"reward": 0.16236014664173126,
"reward_std": 0.3670760989189148,
"rewards/accuracy_reward_step": 0.0546875,
"rewards/final_brier_reward_step": 0.06107109412550926,
"rewards/format_reward_step": 0.09765625,
"rewards/stepwise_brier_reward": 0.06493200361728668,
"step": 15
},
{
"calib/answer_extract_rate": 0.15625,
"calib/auroc": 0.4028132992327365,
"calib/avg_num_step_conf": 0.91015625,
"calib/ece": 0.5319749999999999,
"calib/final_conf_rate": 0.15625,
"calib/format_rate": 0.1171875,
"calib/frac_conf_gt_0.9": 0.825,
"calib/gap": -0.007117647058823562,
"calib/mean_conf": 0.9569749999999999,
"calib/mu_c": 0.9528823529411764,
"calib/mu_w": 0.96,
"calib/nonempty_final_conf_rate": 0.15625,
"calib/nonempty_reasoning_rate": 0.16796875,
"calib/nonempty_step_conf_rate": 0.140625,
"calib/pce": 0.5319749999999999,
"calib/std_conf": 0.03168397662857362,
"calib/step_conf_rate": 0.140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 3020.0,
"completions/max_terminated_length": 3020.0,
"completions/mean_length": 816.5859375,
"completions/mean_terminated_length": 901.0603637695312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.017066666666666667,
"grad_norm": 0.014984040521085262,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0152,
"num_tokens": 4647379.0,
"reward": 0.19561436772346497,
"reward_std": 0.37104684114456177,
"rewards/accuracy_reward_step": 0.06640625,
"rewards/final_brier_reward_step": 0.061235152184963226,
"rewards/format_reward_step": 0.1171875,
"rewards/stepwise_brier_reward": 0.08840983361005783,
"step": 16
},
{
"calib/answer_extract_rate": 0.25,
"calib/auroc": 0.4502032520325203,
"calib/avg_num_step_conf": 1.46484375,
"calib/ece": 0.32492307692307676,
"calib/final_conf_rate": 0.25390625,
"calib/format_rate": 0.2109375,
"calib/frac_conf_gt_0.9": 0.8153846153846154,
"calib/gap": -0.0071341463414634765,
"calib/mean_conf": 0.948,
"calib/mu_c": 0.9453658536585365,
"calib/mu_w": 0.9525,
"calib/nonempty_final_conf_rate": 0.25390625,
"calib/nonempty_reasoning_rate": 0.26953125,
"calib/nonempty_step_conf_rate": 0.24609375,
"calib/pce": 0.3210769230769229,
"calib/std_conf": 0.04184954737371259,
"calib/step_conf_rate": 0.24609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.04296875,
"completions/max_length": 2723.0,
"completions/max_terminated_length": 2723.0,
"completions/mean_length": 699.859375,
"completions/mean_terminated_length": 731.2816162109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 5.0,
"epoch": 0.018133333333333335,
"grad_norm": 0.025489211082458496,
"learning_rate": 4.25e-06,
"loss": 0.0229,
"num_tokens": 4930071.0,
"reward": 0.4290560483932495,
"reward_std": 0.5249615907669067,
"rewards/accuracy_reward_step": 0.1640625,
"rewards/final_brier_reward_step": 0.15336796641349792,
"rewards/format_reward_step": 0.2109375,
"rewards/stepwise_brier_reward": 0.15660616755485535,
"step": 17
},
{
"calib/answer_extract_rate": 0.328125,
"calib/auroc": 0.4468085106382979,
"calib/avg_num_step_conf": 2.16796875,
"calib/ece": 0.3682499999999999,
"calib/final_conf_rate": 0.3125,
"calib/format_rate": 0.27734375,
"calib/frac_conf_gt_0.9": 0.8875,
"calib/gap": 0.002050290135396593,
"calib/mean_conf": 0.9557500000000001,
"calib/mu_c": 0.9565957446808512,
"calib/mu_w": 0.9545454545454546,
"calib/nonempty_final_conf_rate": 0.3125,
"calib/nonempty_reasoning_rate": 0.359375,
"calib/nonempty_step_conf_rate": 0.3203125,
"calib/pce": 0.3682499999999999,
"calib/std_conf": 0.04366849550877611,
"calib/step_conf_rate": 0.3203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.06640625,
"completions/max_length": 3031.0,
"completions/max_terminated_length": 3031.0,
"completions/mean_length": 676.234375,
"completions/mean_terminated_length": 724.334716796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 1.0,
"epoch": 0.0192,
"grad_norm": 0.037937141954898834,
"learning_rate": 4.5e-06,
"loss": -0.0288,
"num_tokens": 5213907.0,
"reward": 0.5089144110679626,
"reward_std": 0.6876594424247742,
"rewards/accuracy_reward_step": 0.1875,
"rewards/final_brier_reward_step": 0.17252811789512634,
"rewards/format_reward_step": 0.27734375,
"rewards/stepwise_brier_reward": 0.18344208598136902,
"step": 18
},
{
"calib/answer_extract_rate": 0.66796875,
"calib/auroc": 0.5192307692307692,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.48286144578313256,
"calib/final_conf_rate": 0.6484375,
"calib/format_rate": 0.578125,
"calib/frac_conf_gt_0.9": 0.9156626506024096,
"calib/gap": -0.002026515151515196,
"calib/mean_conf": 0.9527409638554215,
"calib/mu_c": 0.9516666666666665,
"calib/mu_w": 0.9536931818181817,
"calib/nonempty_final_conf_rate": 0.6484375,
"calib/nonempty_reasoning_rate": 0.7265625,
"calib/nonempty_step_conf_rate": 0.66796875,
"calib/pce": 0.48286144578313256,
"calib/std_conf": 0.037823134809026944,
"calib/step_conf_rate": 0.66796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2904.0,
"completions/max_terminated_length": 2904.0,
"completions/mean_length": 550.953125,
"completions/mean_terminated_length": 566.4417724609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 6.0,
"epoch": 0.020266666666666665,
"grad_norm": 0.03861883282661438,
"learning_rate": 4.75e-06,
"loss": -0.0044,
"num_tokens": 5459711.0,
"reward": 0.9206970930099487,
"reward_std": 0.7485260367393494,
"rewards/accuracy_reward_step": 0.3046875,
"rewards/final_brier_reward_step": 0.3020592927932739,
"rewards/format_reward_step": 0.578125,
"rewards/stepwise_brier_reward": 0.3963542580604553,
"step": 19
},
{
"calib/answer_extract_rate": 0.88671875,
"calib/auroc": 0.4868912337662337,
"calib/avg_num_step_conf": 6.06640625,
"calib/ece": 0.4664549549549549,
"calib/final_conf_rate": 0.8671875,
"calib/format_rate": 0.8203125,
"calib/frac_conf_gt_0.9": 0.9234234234234234,
"calib/gap": -0.004156655844156032,
"calib/mean_conf": 0.9558243243243243,
"calib/mu_c": 0.9537272727272725,
"calib/mu_w": 0.9578839285714286,
"calib/nonempty_final_conf_rate": 0.8671875,
"calib/nonempty_reasoning_rate": 0.94921875,
"calib/nonempty_step_conf_rate": 0.91796875,
"calib/pce": 0.46339189189189184,
"calib/std_conf": 0.04347548489327777,
"calib/step_conf_rate": 0.91796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2736.0,
"completions/max_terminated_length": 2736.0,
"completions/mean_length": 461.83984375,
"completions/mean_terminated_length": 467.31622314453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 39.0,
"epoch": 0.021333333333333333,
"grad_norm": 0.021412760019302368,
"learning_rate": 5e-06,
"loss": -0.0089,
"num_tokens": 5682814.0,
"reward": 1.3245768547058105,
"reward_std": 0.7944626808166504,
"rewards/accuracy_reward_step": 0.4375,
"rewards/final_brier_reward_step": 0.45545026659965515,
"rewards/format_reward_step": 0.8203125,
"rewards/stepwise_brier_reward": 0.5772321224212646,
"step": 20
},
{
"calib/answer_extract_rate": 0.92578125,
"calib/auroc": 0.5046546546546546,
"calib/avg_num_step_conf": 6.984375,
"calib/ece": 0.4655580086580087,
"calib/final_conf_rate": 0.90234375,
"calib/format_rate": 0.859375,
"calib/frac_conf_gt_0.9": 0.8831168831168831,
"calib/gap": 0.009543468468468652,
"calib/mean_conf": 0.9460774891774892,
"calib/mu_c": 0.9510351351351353,
"calib/mu_w": 0.9414916666666666,
"calib/nonempty_final_conf_rate": 0.90234375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.953125,
"calib/pce": 0.4655580086580087,
"calib/std_conf": 0.08062289489701147,
"calib/step_conf_rate": 0.953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 3018.0,
"completions/max_terminated_length": 3018.0,
"completions/mean_length": 496.0390625,
"completions/mean_terminated_length": 503.9127197265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 40.0,
"epoch": 0.0224,
"grad_norm": 0.025596238672733307,
"learning_rate": 4.9722222222222224e-06,
"loss": -0.03,
"num_tokens": 5912760.0,
"reward": 1.365824818611145,
"reward_std": 0.7561129331588745,
"rewards/accuracy_reward_step": 0.4453125,
"rewards/final_brier_reward_step": 0.46789538860321045,
"rewards/format_reward_step": 0.859375,
"rewards/stepwise_brier_reward": 0.6047787666320801,
"step": 21
},
{
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.5091642228739003,
"calib/avg_num_step_conf": 7.29296875,
"calib/ece": 0.4596489795918367,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.93359375,
"calib/frac_conf_gt_0.9": 0.8775510204081632,
"calib/gap": 0.0003416422287390031,
"calib/mean_conf": 0.9492816326530612,
"calib/mu_c": 0.9494545454545454,
"calib/mu_w": 0.9491129032258064,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.4575265306122449,
"calib/std_conf": 0.046251290874293714,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2794.0,
"completions/max_terminated_length": 2794.0,
"completions/mean_length": 480.875,
"completions/mean_terminated_length": 486.57708740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 8.0,
"epoch": 0.023466666666666667,
"grad_norm": 0.14732640981674194,
"learning_rate": 4.944444444444445e-06,
"loss": 0.019,
"num_tokens": 6137680.0,
"reward": 1.470207691192627,
"reward_std": 0.742310643196106,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5073757171630859,
"rewards/format_reward_step": 0.93359375,
"rewards/stepwise_brier_reward": 0.6703301668167114,
"step": 22
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4517401992495796,
"calib/avg_num_step_conf": 7.375,
"calib/ece": 0.42261044176706813,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.8674698795180723,
"calib/gap": -0.008777979039979233,
"calib/mean_conf": 0.946144578313253,
"calib/mu_c": 0.9419847328244275,
"calib/mu_w": 0.9507627118644068,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.42132530120481915,
"calib/std_conf": 0.04665083474335586,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2701.0,
"completions/max_terminated_length": 2701.0,
"completions/mean_length": 478.09765625,
"completions/mean_terminated_length": 483.7668151855469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.024533333333333334,
"grad_norm": 0.012211649678647518,
"learning_rate": 4.9166666666666665e-06,
"loss": 0.0314,
"num_tokens": 6364009.0,
"reward": 1.54275381565094,
"reward_std": 0.727489173412323,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.5397515296936035,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.6625137329101562,
"step": 23
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.46217724583109204,
"calib/avg_num_step_conf": 7.7421875,
"calib/ece": 0.5186194331983806,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.94140625,
"calib/frac_conf_gt_0.9": 0.8218623481781376,
"calib/gap": -0.007405594405594518,
"calib/mean_conf": 0.9396720647773279,
"calib/mu_c": 0.9353846153846154,
"calib/mu_w": 0.9427902097902099,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.5186194331983806,
"calib/std_conf": 0.04596488878480891,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2213.0,
"completions/max_terminated_length": 2213.0,
"completions/mean_length": 493.14453125,
"completions/mean_terminated_length": 497.0275573730469,
"completions/min_length": 0.0,
"completions/min_terminated_length": 70.0,
"epoch": 0.0256,
"grad_norm": 0.010158343240618706,
"learning_rate": 4.888888888888889e-06,
"loss": -0.008,
"num_tokens": 6594766.0,
"reward": 1.3534126281738281,
"reward_std": 0.7272195816040039,
"rewards/accuracy_reward_step": 0.41015625,
"rewards/final_brier_reward_step": 0.4507281184196472,
"rewards/format_reward_step": 0.94140625,
"rewards/stepwise_brier_reward": 0.6191724538803101,
"step": 24
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5088761174968072,
"calib/avg_num_step_conf": 7.953125,
"calib/ece": 0.3985657370517929,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.8127490039840638,
"calib/gap": 0.0011871008939976502,
"calib/mean_conf": 0.936414342629482,
"calib/mu_c": 0.936962962962963,
"calib/mu_w": 0.9357758620689653,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3985657370517929,
"calib/std_conf": 0.04696242917959356,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2319.0,
"completions/max_terminated_length": 2319.0,
"completions/mean_length": 464.69921875,
"completions/mean_terminated_length": 470.2095031738281,
"completions/min_length": 0.0,
"completions/min_terminated_length": 81.0,
"epoch": 0.02666666666666667,
"grad_norm": 0.01881440542638302,
"learning_rate": 4.861111111111111e-06,
"loss": 0.0143,
"num_tokens": 6816953.0,
"reward": 1.5973844528198242,
"reward_std": 0.6234603524208069,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5789656043052673,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6933847665786743,
"step": 25
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.40984695168226665,
"calib/avg_num_step_conf": 7.50390625,
"calib/ece": 0.39682539682539697,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.7261904761904762,
"calib/gap": -0.015529977232481729,
"calib/mean_conf": 0.9255555555555556,
"calib/mu_c": 0.9182835820895522,
"calib/mu_w": 0.9338135593220339,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.39531746031746046,
"calib/std_conf": 0.05454800646053292,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2068.0,
"completions/max_terminated_length": 2068.0,
"completions/mean_length": 466.38671875,
"completions/mean_terminated_length": 470.0590515136719,
"completions/min_length": 0.0,
"completions/min_terminated_length": 267.0,
"epoch": 0.027733333333333332,
"grad_norm": 0.009599103592336178,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0171,
"num_tokens": 7041588.0,
"reward": 1.5898265838623047,
"reward_std": 0.6190015077590942,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5721874833106995,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6855560541152954,
"step": 26
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4757980833791125,
"calib/avg_num_step_conf": 7.49609375,
"calib/ece": 0.41597656250000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.62109375,
"calib/gap": -0.006178966001343045,
"calib/mean_conf": 0.9095703125000001,
"calib/mu_c": 0.9064566929133857,
"calib/mu_w": 0.9126356589147288,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.41472656250000006,
"calib/std_conf": 0.0550373713366867,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1052.0,
"completions/max_terminated_length": 1052.0,
"completions/mean_length": 463.4453125,
"completions/mean_terminated_length": 470.8016052246094,
"completions/min_length": 0.0,
"completions/min_terminated_length": 238.0,
"epoch": 0.0288,
"grad_norm": 0.012968444265425205,
"learning_rate": 4.805555555555556e-06,
"loss": -0.0082,
"num_tokens": 7265446.0,
"reward": 1.5642105340957642,
"reward_std": 0.6272684335708618,
"rewards/accuracy_reward_step": 0.49609375,
"rewards/final_brier_reward_step": 0.5723339915275574,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.715758204460144,
"step": 27
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4433365917236885,
"calib/avg_num_step_conf": 6.921875,
"calib/ece": 0.2807086614173228,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.5,
"calib/gap": -0.006353861192570864,
"calib/mean_conf": 0.8876377952755905,
"calib/mu_c": 0.8851612903225807,
"calib/mu_w": 0.8915151515151516,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2790551181102362,
"calib/std_conf": 0.07656973243944275,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1428.0,
"completions/max_terminated_length": 1428.0,
"completions/mean_length": 459.84765625,
"completions/mean_terminated_length": 467.1468505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 111.0,
"epoch": 0.029866666666666666,
"grad_norm": 0.012692811898887157,
"learning_rate": 4.777777777777778e-06,
"loss": -0.0264,
"num_tokens": 7490111.0,
"reward": 1.7726266384124756,
"reward_std": 0.607335090637207,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.6710312962532043,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7788500785827637,
"step": 28
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4182775178527456,
"calib/avg_num_step_conf": 7.19140625,
"calib/ece": 0.3746274509803921,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.4666666666666667,
"calib/gap": -0.01619859640482635,
"calib/mean_conf": 0.8848235294117647,
"calib/mu_c": 0.8769465648854962,
"calib/mu_w": 0.8931451612903225,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3728627450980392,
"calib/std_conf": 0.07264652694891428,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1312.0,
"completions/max_terminated_length": 1312.0,
"completions/mean_length": 498.39453125,
"completions/mean_terminated_length": 506.3055725097656,
"completions/min_length": 0.0,
"completions/min_terminated_length": 178.0,
"epoch": 0.030933333333333334,
"grad_norm": 0.00959760695695877,
"learning_rate": 4.75e-06,
"loss": -0.0374,
"num_tokens": 7724828.0,
"reward": 1.5990973711013794,
"reward_std": 0.5396340489387512,
"rewards/accuracy_reward_step": 0.51171875,
"rewards/final_brier_reward_step": 0.5891090035438538,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.760405421257019,
"step": 29
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4417037507946599,
"calib/avg_num_step_conf": 6.20703125,
"calib/ece": 0.3761354581673306,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.3426294820717131,
"calib/gap": -0.017837253655435648,
"calib/mean_conf": 0.8504780876494024,
"calib/mu_c": 0.8412396694214875,
"calib/mu_w": 0.8590769230769232,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3722709163346613,
"calib/std_conf": 0.09256294573013182,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1832.0,
"completions/max_terminated_length": 1832.0,
"completions/mean_length": 501.0,
"completions/mean_terminated_length": 510.9801025390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 27.0,
"epoch": 0.032,
"grad_norm": 0.010969402268528938,
"learning_rate": 4.722222222222222e-06,
"loss": -0.018,
"num_tokens": 7960068.0,
"reward": 1.525612711906433,
"reward_std": 0.6004476547241211,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.5841984152793884,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7370023727416992,
"step": 30
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.4944075063692289,
"calib/avg_num_step_conf": 6.1640625,
"calib/ece": 0.33633858267716543,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.14566929133858267,
"calib/gap": 0.0018871559062948462,
"calib/mean_conf": 0.8090944881889763,
"calib/mu_c": 0.8100826446280993,
"calib/mu_w": 0.8081954887218045,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.33452755905511816,
"calib/std_conf": 0.08994512177016528,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3057.0,
"completions/max_terminated_length": 3057.0,
"completions/mean_length": 493.3203125,
"completions/mean_terminated_length": 499.16998291015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 235.0,
"epoch": 0.03306666666666667,
"grad_norm": 0.009395385161042213,
"learning_rate": 4.694444444444445e-06,
"loss": 0.0088,
"num_tokens": 8192270.0,
"reward": 1.5579791069030762,
"reward_std": 0.4944719076156616,
"rewards/accuracy_reward_step": 0.47265625,
"rewards/final_brier_reward_step": 0.6239476203918457,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7954689264297485,
"step": 31
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.4968307692307692,
"calib/avg_num_step_conf": 5.7734375,
"calib/ece": 0.24705882352941166,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.07450980392156863,
"calib/gap": -0.007018461538461529,
"calib/mean_conf": 0.7509019607843138,
"calib/mu_c": 0.7474615384615384,
"calib/mu_w": 0.7544799999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24407843137254892,
"calib/std_conf": 0.10591695876023853,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2271.0,
"completions/max_terminated_length": 2271.0,
"completions/mean_length": 465.46875,
"completions/mean_terminated_length": 472.857177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 217.0,
"epoch": 0.034133333333333335,
"grad_norm": 0.01071823202073574,
"learning_rate": 4.666666666666667e-06,
"loss": -0.016,
"num_tokens": 8418134.0,
"reward": 1.6352819204330444,
"reward_std": 0.4783379137516022,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.6744414567947388,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8354364633560181,
"step": 32
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4899872854418309,
"calib/avg_num_step_conf": 5.52734375,
"calib/ece": 0.16980237154150202,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.05928853754940711,
"calib/gap": -0.007132867132867204,
"calib/mean_conf": 0.7214229249011856,
"calib/mu_c": 0.7183216783216783,
"calib/mu_w": 0.7254545454545455,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.16300395256917,
"calib/std_conf": 0.12191631528612948,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2500.0,
"completions/max_terminated_length": 2500.0,
"completions/mean_length": 514.33203125,
"completions/mean_terminated_length": 518.3818969726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 174.0,
"epoch": 0.0352,
"grad_norm": 0.008350872434675694,
"learning_rate": 4.638888888888889e-06,
"loss": 0.0144,
"num_tokens": 8656675.0,
"reward": 1.7188265323638916,
"reward_std": 0.4865078330039978,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.6994839906692505,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8555097579956055,
"step": 33
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.47640522875816993,
"calib/avg_num_step_conf": 5.54296875,
"calib/ece": 0.14896825396825403,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.023809523809523808,
"calib/gap": -0.011184313725490336,
"calib/mean_conf": 0.6857936507936507,
"calib/mu_c": 0.6812666666666666,
"calib/mu_w": 0.6924509803921569,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.11976190476190483,
"calib/std_conf": 0.12821641659123492,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2431.0,
"completions/max_terminated_length": 2431.0,
"completions/mean_length": 497.8046875,
"completions/mean_terminated_length": 499.75689697265625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 218.0,
"epoch": 0.03626666666666667,
"grad_norm": 0.009573661722242832,
"learning_rate": 4.611111111111112e-06,
"loss": 0.0059,
"num_tokens": 8889225.0,
"reward": 1.7634197473526,
"reward_std": 0.5686565637588501,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.715394139289856,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8617225289344788,
"step": 34
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.4415547208251098,
"calib/avg_num_step_conf": 5.40625,
"calib/ece": 0.18134920634920634,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.047619047619047616,
"calib/gap": -0.02662634494174576,
"calib/mean_conf": 0.7015079365079365,
"calib/mu_c": 0.689568345323741,
"calib/mu_w": 0.7161946902654868,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.16563492063492066,
"calib/std_conf": 0.12104746189924816,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2504.0,
"completions/max_terminated_length": 2504.0,
"completions/mean_length": 605.14453125,
"completions/mean_terminated_length": 609.909423828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 232.0,
"epoch": 0.037333333333333336,
"grad_norm": 0.008835054002702236,
"learning_rate": 4.583333333333333e-06,
"loss": 0.0053,
"num_tokens": 9153398.0,
"reward": 1.686740756034851,
"reward_std": 0.3833616077899933,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.688662052154541,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8395507335662842,
"step": 35
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.501685855263158,
"calib/avg_num_step_conf": 4.85546875,
"calib/ece": 0.09594488188976376,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.015748031496062992,
"calib/gap": 0.003531249999999986,
"calib/mean_conf": 0.7131102362204724,
"calib/mu_c": 0.714,
"calib/mu_w": 0.71046875,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.030511811023622007,
"calib/std_conf": 0.10194646799259242,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2517.0,
"completions/max_terminated_length": 2517.0,
"completions/mean_length": 483.80859375,
"completions/mean_terminated_length": 487.61810302734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.0384,
"grad_norm": 0.016380857676267624,
"learning_rate": 4.555555555555556e-06,
"loss": 0.0072,
"num_tokens": 9379965.0,
"reward": 2.017597198486328,
"reward_std": 0.48127132654190063,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/final_brier_reward_step": 0.7909355759620667,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8575782775878906,
"step": 36
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.4459050564819796,
"calib/avg_num_step_conf": 4.66015625,
"calib/ece": 0.1722672064777328,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.032388663967611336,
"calib/gap": -0.0178321678321679,
"calib/mean_conf": 0.7465991902834008,
"calib/mu_c": 0.739090909090909,
"calib/mu_w": 0.7569230769230769,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.16995951417004052,
"calib/std_conf": 0.09547544186514331,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2609.0,
"completions/max_terminated_length": 2609.0,
"completions/mean_length": 575.828125,
"completions/mean_terminated_length": 580.3621826171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 236.0,
"epoch": 0.039466666666666664,
"grad_norm": 0.010821904055774212,
"learning_rate": 4.527777777777778e-06,
"loss": 0.0247,
"num_tokens": 9634473.0,
"reward": 1.682852864265442,
"reward_std": 0.38647031784057617,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.6793968677520752,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.8020142912864685,
"step": 37
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5007050765511684,
"calib/avg_num_step_conf": 4.59375,
"calib/ece": 0.20439516129032248,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.04435483870967742,
"calib/gap": -0.001553854418479772,
"calib/mean_conf": 0.7931048387096774,
"calib/mu_c": 0.7924657534246574,
"calib/mu_w": 0.7940196078431372,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.20439516129032248,
"calib/std_conf": 0.07716523102934827,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2516.0,
"completions/max_terminated_length": 2516.0,
"completions/mean_length": 612.84375,
"completions/mean_terminated_length": 617.6693115234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 235.0,
"epoch": 0.04053333333333333,
"grad_norm": 0.009312131442129612,
"learning_rate": 4.5e-06,
"loss": 0.0488,
"num_tokens": 9898249.0,
"reward": 1.7124087810516357,
"reward_std": 0.5167993307113647,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.6861327886581421,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.8119394779205322,
"step": 38
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5022058823529412,
"calib/avg_num_step_conf": 4.58203125,
"calib/ece": 0.2915936254980079,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.09561752988047809,
"calib/gap": -0.0007378516624041431,
"calib/mean_conf": 0.8334262948207172,
"calib/mu_c": 0.8330882352941176,
"calib/mu_w": 0.8338260869565217,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2915936254980079,
"calib/std_conf": 0.06562155736230371,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2500.0,
"completions/max_terminated_length": 2500.0,
"completions/mean_length": 580.625,
"completions/mean_terminated_length": 580.625,
"completions/min_length": 197.0,
"completions/min_terminated_length": 197.0,
"epoch": 0.0416,
"grad_norm": 0.008780171163380146,
"learning_rate": 4.472222222222223e-06,
"loss": 0.0244,
"num_tokens": 10152977.0,
"reward": 1.6495329141616821,
"reward_std": 0.5215615034103394,
"rewards/accuracy_reward_step": 0.53125,
"rewards/final_brier_reward_step": 0.6453015804290771,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8122050166130066,
"step": 39
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5291847109772171,
"calib/avg_num_step_conf": 4.4375,
"calib/ece": 0.3016535433070866,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.14173228346456693,
"calib/gap": 0.00837256009539955,
"calib/mean_conf": 0.8567716535433071,
"calib/mu_c": 0.8604964539007092,
"calib/mu_w": 0.8521238938053096,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3016535433070866,
"calib/std_conf": 0.0547130220973908,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2471.0,
"completions/max_terminated_length": 2471.0,
"completions/mean_length": 577.18359375,
"completions/mean_terminated_length": 581.7283325195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 213.0,
"epoch": 0.042666666666666665,
"grad_norm": 0.009869641624391079,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0026,
"num_tokens": 10407496.0,
"reward": 1.6799644231796265,
"reward_std": 0.5000669360160828,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.6576230525970459,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7809846997261047,
"step": 40
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5991044776119404,
"calib/avg_num_step_conf": 4.63671875,
"calib/ece": 0.0840637450199204,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.3346613545816733,
"calib/gap": 0.019556218905472544,
"calib/mean_conf": 0.8848605577689241,
"calib/mu_c": 0.8887562189054725,
"calib/mu_w": 0.8692,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.0840637450199204,
"calib/std_conf": 0.05505774294189895,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2503.0,
"completions/max_terminated_length": 2503.0,
"completions/mean_length": 529.984375,
"completions/mean_terminated_length": 538.3968505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 198.0,
"epoch": 0.04373333333333333,
"grad_norm": 0.00941492710262537,
"learning_rate": 4.416666666666667e-06,
"loss": -0.0049,
"num_tokens": 10650420.0,
"reward": 2.0804450511932373,
"reward_std": 0.3655003011226654,
"rewards/accuracy_reward_step": 0.78515625,
"rewards/final_brier_reward_step": 0.8202797174453735,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.8296257257461548,
"step": 41
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.538619390432618,
"calib/avg_num_step_conf": 4.51953125,
"calib/ece": 0.251328125,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.4453125,
"calib/gap": 0.0070066608356322835,
"calib/mean_conf": 0.903671875,
"calib/mu_c": 0.906107784431138,
"calib/mu_w": 0.8991011235955058,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.251328125,
"calib/std_conf": 0.04970021211206621,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 913.0,
"completions/max_terminated_length": 913.0,
"completions/mean_length": 427.34765625,
"completions/mean_terminated_length": 434.1309814453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 250.0,
"epoch": 0.0448,
"grad_norm": 0.010592802427709103,
"learning_rate": 4.388888888888889e-06,
"loss": -0.0141,
"num_tokens": 10864189.0,
"reward": 1.8587779998779297,
"reward_std": 0.3515624403953552,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.7107508182525635,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8102985620498657,
"step": 42
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.6028612531969308,
"calib/avg_num_step_conf": 4.51171875,
"calib/ece": 0.18884920634920657,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.5674603174603174,
"calib/gap": 0.019990409207160997,
"calib/mean_conf": 0.9190079365079366,
"calib/mu_c": 0.9244021739130434,
"calib/mu_w": 0.9044117647058824,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.18884920634920657,
"calib/std_conf": 0.04412518196045866,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2503.0,
"completions/max_terminated_length": 2503.0,
"completions/mean_length": 521.09375,
"completions/mean_terminated_length": 527.2727661132812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 249.0,
"epoch": 0.04586666666666667,
"grad_norm": 0.010173147544264793,
"learning_rate": 4.361111111111112e-06,
"loss": -0.0125,
"num_tokens": 11102813.0,
"reward": 1.9611314535140991,
"reward_std": 0.4487866759300232,
"rewards/accuracy_reward_step": 0.71875,
"rewards/final_brier_reward_step": 0.7611573934555054,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8021184206008911,
"step": 43
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5419849758085052,
"calib/avg_num_step_conf": 4.44140625,
"calib/ece": 0.400199203187251,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.5657370517928287,
"calib/gap": 0.005902724726254083,
"calib/mean_conf": 0.9193227091633466,
"calib/mu_c": 0.9221212121212122,
"calib/mu_w": 0.9162184873949581,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3968127490039841,
"calib/std_conf": 0.05360267210135566,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2558.0,
"completions/max_terminated_length": 2558.0,
"completions/mean_length": 584.58984375,
"completions/mean_terminated_length": 586.8823852539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 254.0,
"epoch": 0.046933333333333334,
"grad_norm": 0.008310251869261265,
"learning_rate": 4.333333333333334e-06,
"loss": 0.009,
"num_tokens": 11358788.0,
"reward": 1.6001975536346436,
"reward_std": 0.4775276780128479,
"rewards/accuracy_reward_step": 0.515625,
"rewards/final_brier_reward_step": 0.5843167304992676,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7617859244346619,
"step": 44
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5925801238890386,
"calib/avg_num_step_conf": 4.453125,
"calib/ece": 0.30039682539682533,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.6190476190476191,
"calib/gap": 0.015844330729868017,
"calib/mean_conf": 0.9273809523809523,
"calib/mu_c": 0.9332911392405062,
"calib/mu_w": 0.9174468085106382,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30039682539682533,
"calib/std_conf": 0.04292256910735317,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2485.0,
"completions/max_terminated_length": 2485.0,
"completions/mean_length": 540.63671875,
"completions/mean_terminated_length": 544.8936767578125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 214.0,
"epoch": 0.048,
"grad_norm": 0.01107387151569128,
"learning_rate": 4.305555555555556e-06,
"loss": 0.002,
"num_tokens": 11602239.0,
"reward": 1.7854273319244385,
"reward_std": 0.4884273111820221,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.6708077788352966,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7990267872810364,
"step": 45
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5024832301341589,
"calib/avg_num_step_conf": 4.3671875,
"calib/ece": 0.3325196850393701,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6653543307086615,
"calib/gap": -0.002844427244581782,
"calib/mean_conf": 0.9309448818897638,
"calib/mu_c": 0.9298026315789476,
"calib/mu_w": 0.9326470588235294,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3325196850393701,
"calib/std_conf": 0.04164781229645246,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2456.0,
"completions/max_terminated_length": 2456.0,
"completions/mean_length": 589.28515625,
"completions/mean_terminated_length": 593.9251708984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 231.0,
"epoch": 0.04906666666666667,
"grad_norm": 0.020898550748825073,
"learning_rate": 4.277777777777778e-06,
"loss": 0.0114,
"num_tokens": 11857864.0,
"reward": 1.743819236755371,
"reward_std": 0.47148585319519043,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.6370730400085449,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7991417646408081,
"step": 46
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5463698241633579,
"calib/avg_num_step_conf": 4.27734375,
"calib/ece": 0.2802400000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.7,
"calib/gap": 0.007562393647192245,
"calib/mean_conf": 0.93624,
"calib/mu_c": 0.9388414634146341,
"calib/mu_w": 0.9312790697674419,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2802400000000001,
"calib/std_conf": 0.03326653573788528,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00390625,
"completions/max_length": 2476.0,
"completions/max_terminated_length": 2476.0,
"completions/mean_length": 638.3046875,
"completions/mean_terminated_length": 640.807861328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 293.0,
"epoch": 0.050133333333333335,
"grad_norm": 0.009583506733179092,
"learning_rate": 4.25e-06,
"loss": 0.0005,
"num_tokens": 12127246.0,
"reward": 1.8233468532562256,
"reward_std": 0.34553125500679016,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.6817461252212524,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8147666454315186,
"step": 47
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5263819927176432,
"calib/avg_num_step_conf": 4.19921875,
"calib/ece": 0.30960629921259836,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.6732283464566929,
"calib/gap": 0.0027090367428002082,
"calib/mean_conf": 0.9355905511811025,
"calib/mu_c": 0.9366037735849058,
"calib/mu_w": 0.9338947368421056,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30960629921259836,
"calib/std_conf": 0.03437068755041604,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2524.0,
"completions/max_terminated_length": 2524.0,
"completions/mean_length": 560.03515625,
"completions/mean_terminated_length": 566.6759033203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 270.0,
"epoch": 0.0512,
"grad_norm": 0.01360836811363697,
"learning_rate": 4.222222222222223e-06,
"loss": -0.0274,
"num_tokens": 12374303.0,
"reward": 1.7975071668624878,
"reward_std": 0.48006561398506165,
"rewards/accuracy_reward_step": 0.62109375,
"rewards/final_brier_reward_step": 0.6648679971694946,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8142232894897461,
"step": 48
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5263713080168777,
"calib/avg_num_step_conf": 4.1015625,
"calib/ece": 0.2469169960474309,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6600790513833992,
"calib/gap": 0.003284591881274168,
"calib/mean_conf": 0.9346640316205533,
"calib/mu_c": 0.9356896551724138,
"calib/mu_w": 0.9324050632911396,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2469169960474309,
"calib/std_conf": 0.0297616647798036,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2460.0,
"completions/max_terminated_length": 2460.0,
"completions/mean_length": 630.34765625,
"completions/mean_terminated_length": 640.3532104492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 243.0,
"epoch": 0.05226666666666667,
"grad_norm": 0.010739161632955074,
"learning_rate": 4.194444444444445e-06,
"loss": -0.007,
"num_tokens": 12640208.0,
"reward": 1.897510051727295,
"reward_std": 0.49483951926231384,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/final_brier_reward_step": 0.716312050819397,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8190404176712036,
"step": 49
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.603075091061922,
"calib/avg_num_step_conf": 3.83203125,
"calib/ece": 0.26188235294117657,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.6705882352941176,
"calib/gap": 0.013049173437937878,
"calib/mean_conf": 0.9363921568627452,
"calib/mu_c": 0.9406395348837211,
"calib/mu_w": 0.9275903614457832,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.26188235294117657,
"calib/std_conf": 0.03151599558479603,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2461.0,
"completions/max_terminated_length": 2461.0,
"completions/mean_length": 616.73046875,
"completions/mean_terminated_length": 624.0435180664062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 186.0,
"epoch": 0.05333333333333334,
"grad_norm": 0.010514969006180763,
"learning_rate": 4.166666666666667e-06,
"loss": -0.003,
"num_tokens": 12903451.0,
"reward": 1.8781836032867432,
"reward_std": 0.39321768283843994,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.7060120701789856,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7989096641540527,
"step": 50
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6135584376863447,
"calib/avg_num_step_conf": 3.72265625,
"calib/ece": 0.24864000000000014,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.68,
"calib/gap": 0.017291293977340305,
"calib/mean_conf": 0.9366400000000001,
"calib/mu_c": 0.9420348837209304,
"calib/mu_w": 0.92474358974359,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24864000000000014,
"calib/std_conf": 0.03652821375320724,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2631.0,
"completions/max_terminated_length": 2631.0,
"completions/mean_length": 725.875,
"completions/mean_terminated_length": 725.875,
"completions/min_length": 272.0,
"completions/min_terminated_length": 272.0,
"epoch": 0.0544,
"grad_norm": 0.009126733988523483,
"learning_rate": 4.138888888888889e-06,
"loss": 0.0454,
"num_tokens": 13198571.0,
"reward": 1.874480128288269,
"reward_std": 0.47806280851364136,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.712510883808136,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.8010349273681641,
"step": 51
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4878570717627152,
"calib/avg_num_step_conf": 4.01953125,
"calib/ece": 0.15903225806451618,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 0.7580645161290323,
"calib/gap": 0.0030536478550813317,
"calib/mean_conf": 0.9469354838709677,
"calib/mu_c": 0.9475634517766498,
"calib/mu_w": 0.9445098039215685,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.15580645161290327,
"calib/std_conf": 0.04660536829377197,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2477.0,
"completions/max_terminated_length": 2477.0,
"completions/mean_length": 719.6015625,
"completions/mean_terminated_length": 733.936279296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 107.0,
"epoch": 0.055466666666666664,
"grad_norm": 0.010852030478417873,
"learning_rate": 4.111111111111111e-06,
"loss": 0.0161,
"num_tokens": 13490741.0,
"reward": 2.035630702972412,
"reward_std": 0.41557496786117554,
"rewards/accuracy_reward_step": 0.76953125,
"rewards/final_brier_reward_step": 0.7765073776245117,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.8347657918930054,
"step": 52
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.554396650171298,
"calib/avg_num_step_conf": 4.16796875,
"calib/ece": 0.2316796875,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.80078125,
"calib/gap": 0.004051008755233898,
"calib/mean_conf": 0.9543359375,
"calib/mu_c": 0.9554594594594594,
"calib/mu_w": 0.9514084507042255,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2316796875,
"calib/std_conf": 0.03062417788277904,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2055.0,
"completions/max_terminated_length": 2055.0,
"completions/mean_length": 744.93359375,
"completions/mean_terminated_length": 756.7579956054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 302.0,
"epoch": 0.05653333333333333,
"grad_norm": 0.010888488031923771,
"learning_rate": 4.083333333333334e-06,
"loss": 0.0073,
"num_tokens": 13787268.0,
"reward": 1.9786882400512695,
"reward_std": 0.43185755610466003,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.7465863227844238,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8322290182113647,
"step": 53
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.6321407274895647,
"calib/avg_num_step_conf": 4.45703125,
"calib/ece": 0.12000000000000009,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.905511811023622,
"calib/gap": 0.02065354800238517,
"calib/mean_conf": 0.966456692913386,
"calib/mu_c": 0.9696279069767443,
"calib/mu_w": 0.9489743589743591,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.12000000000000009,
"calib/std_conf": 0.03484561132242132,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2464.0,
"completions/max_terminated_length": 2464.0,
"completions/mean_length": 736.640625,
"completions/mean_terminated_length": 748.3333740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 291.0,
"epoch": 0.0576,
"grad_norm": 0.010013816878199577,
"learning_rate": 4.055555555555556e-06,
"loss": -0.0057,
"num_tokens": 14082080.0,
"reward": 2.181171417236328,
"reward_std": 0.31019675731658936,
"rewards/accuracy_reward_step": 0.83984375,
"rewards/final_brier_reward_step": 0.8491636514663696,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.859897255897522,
"step": 54
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.6077430972388956,
"calib/avg_num_step_conf": 4.53125,
"calib/ece": 0.37301204819277134,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.9116465863453815,
"calib/gap": 0.03377951180472172,
"calib/mean_conf": 0.9633734939759038,
"calib/mu_c": 0.9772108843537415,
"calib/mu_w": 0.9434313725490198,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.37301204819277134,
"calib/std_conf": 0.08802076847524597,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2492.0,
"completions/max_terminated_length": 2492.0,
"completions/mean_length": 802.35546875,
"completions/mean_terminated_length": 811.8695678710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 324.0,
"epoch": 0.058666666666666666,
"grad_norm": 0.009584730491042137,
"learning_rate": 4.027777777777779e-06,
"loss": 0.0133,
"num_tokens": 14395307.0,
"reward": 1.6883587837219238,
"reward_std": 0.4356482923030853,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.6104562282562256,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7523536682128906,
"step": 55
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.6073613512435165,
"calib/avg_num_step_conf": 4.58203125,
"calib/ece": 0.39220883534136564,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.9678714859437751,
"calib/gap": 0.006641175688256062,
"calib/mean_conf": 0.97855421686747,
"calib/mu_c": 0.9813013698630135,
"calib/mu_w": 0.9746601941747575,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.39220883534136564,
"calib/std_conf": 0.018310082160494335,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2493.0,
"completions/max_terminated_length": 2493.0,
"completions/mean_length": 820.5625,
"completions/mean_terminated_length": 833.5873413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 439.0,
"epoch": 0.05973333333333333,
"grad_norm": 0.010322212241590023,
"learning_rate": 4.000000000000001e-06,
"loss": -0.0006,
"num_tokens": 14712211.0,
"reward": 1.6709468364715576,
"reward_std": 0.3402813971042633,
"rewards/accuracy_reward_step": 0.5703125,
"rewards/final_brier_reward_step": 0.5817476511001587,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.7582898139953613,
"step": 56
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5652153902798233,
"calib/avg_num_step_conf": 4.61328125,
"calib/ece": 0.20820000000000002,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.96875,
"calib/frac_conf_gt_0.9": 0.988,
"calib/gap": 0.006562960235640736,
"calib/mean_conf": 0.9842000000000001,
"calib/mu_c": 0.9856701030927837,
"calib/mu_w": 0.979107142857143,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.20820000000000002,
"calib/std_conf": 0.016624078921853087,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2762.0,
"completions/max_terminated_length": 2762.0,
"completions/mean_length": 848.28515625,
"completions/mean_terminated_length": 854.9645385742188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 343.0,
"epoch": 0.0608,
"grad_norm": 0.01343838032335043,
"learning_rate": 3.972222222222223e-06,
"loss": -0.0141,
"num_tokens": 15036164.0,
"reward": 2.0146446228027344,
"reward_std": 0.42082732915878296,
"rewards/accuracy_reward_step": 0.7578125,
"rewards/final_brier_reward_step": 0.7586382627487183,
"rewards/format_reward_step": 0.96875,
"rewards/stepwise_brier_reward": 0.8155644536018372,
"step": 57
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.4425564116675839,
"calib/avg_num_step_conf": 4.65625,
"calib/ece": 0.35218000000000005,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.984,
"calib/gap": -0.003189323059988869,
"calib/mean_conf": 0.98418,
"calib/mu_c": 0.983006329113924,
"calib/mu_w": 0.9861956521739129,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.35218000000000005,
"calib/std_conf": 0.014471613593514721,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2454.0,
"completions/max_terminated_length": 2454.0,
"completions/mean_length": 875.03125,
"completions/mean_terminated_length": 885.4071655273438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 379.0,
"epoch": 0.06186666666666667,
"grad_norm": 0.00953914038836956,
"learning_rate": 3.944444444444445e-06,
"loss": -0.0265,
"num_tokens": 15366492.0,
"reward": 1.751786708831787,
"reward_std": 0.6095901131629944,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.622763991355896,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7359455823898315,
"step": 58
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5800150519978106,
"calib/avg_num_step_conf": 4.609375,
"calib/ece": 0.3273725490196079,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.0024261083743843948,
"calib/mean_conf": 0.9861960784313726,
"calib/mu_c": 0.9870238095238096,
"calib/mu_w": 0.9845977011494252,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3273725490196079,
"calib/std_conf": 0.00929055942477207,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1890.0,
"completions/max_terminated_length": 1890.0,
"completions/mean_length": 769.13671875,
"completions/mean_terminated_length": 781.3452758789062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 339.0,
"epoch": 0.06293333333333333,
"grad_norm": 0.009146219119429588,
"learning_rate": 3.916666666666667e-06,
"loss": -0.0049,
"num_tokens": 15669639.0,
"reward": 1.8364317417144775,
"reward_std": 0.4589112102985382,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.6660621166229248,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7577903270721436,
"step": 59
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5622893258426966,
"calib/avg_num_step_conf": 4.62109375,
"calib/ece": 0.34421686746987956,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0032183988764044047,
"calib/mean_conf": 0.9867871485943776,
"calib/mu_c": 0.9879374999999999,
"calib/mu_w": 0.9847191011235955,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.34421686746987956,
"calib/std_conf": 0.007973278383118638,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2733.0,
"completions/max_terminated_length": 2733.0,
"completions/mean_length": 799.46875,
"completions/mean_terminated_length": 815.3944702148438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 388.0,
"epoch": 0.064,
"grad_norm": 0.009623522870242596,
"learning_rate": 3.88888888888889e-06,
"loss": -0.0195,
"num_tokens": 15983159.0,
"reward": 1.7713819742202759,
"reward_std": 0.5381677150726318,
"rewards/accuracy_reward_step": 0.625,
"rewards/final_brier_reward_step": 0.6353933811187744,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.754822313785553,
"step": 60
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5385592241976449,
"calib/avg_num_step_conf": 4.89453125,
"calib/ece": 0.26834645669291335,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005110444085276722,
"calib/mean_conf": 0.9888188976377953,
"calib/mu_c": 0.9889617486338795,
"calib/mu_w": 0.9884507042253519,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26834645669291335,
"calib/std_conf": 0.005115080872098183,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2905.0,
"completions/max_terminated_length": 2905.0,
"completions/mean_length": 753.19140625,
"completions/mean_terminated_length": 759.1220703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 361.0,
"epoch": 0.06506666666666666,
"grad_norm": 0.009747637435793877,
"learning_rate": 3.861111111111112e-06,
"loss": 0.0259,
"num_tokens": 16280040.0,
"reward": 1.9452223777770996,
"reward_std": 0.3222607970237732,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7210999727249146,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7863516211509705,
"step": 61
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5532234432234432,
"calib/avg_num_step_conf": 4.796875,
"calib/ece": 0.296403162055336,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0020761904761904537,
"calib/mean_conf": 0.988102766798419,
"calib/mu_c": 0.988742857142857,
"calib/mu_w": 0.9866666666666666,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.296403162055336,
"calib/std_conf": 0.0063772471241418886,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1681.0,
"completions/max_terminated_length": 1681.0,
"completions/mean_length": 783.80078125,
"completions/mean_terminated_length": 796.2421264648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 401.0,
"epoch": 0.06613333333333334,
"grad_norm": 0.01057348120957613,
"learning_rate": 3.833333333333334e-06,
"loss": -0.023,
"num_tokens": 16587773.0,
"reward": 1.890638828277588,
"reward_std": 0.4780218303203583,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.691383957862854,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8008589744567871,
"step": 62
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5422031955774287,
"calib/avg_num_step_conf": 4.87109375,
"calib/ece": 0.3461811023622048,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0008582215330680265,
"calib/mean_conf": 0.9879133858267717,
"calib/mu_c": 0.9882208588957054,
"calib/mu_w": 0.9873626373626374,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3461811023622048,
"calib/std_conf": 0.005884081852323143,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 2563.0,
"completions/max_terminated_length": 2563.0,
"completions/mean_length": 811.48046875,
"completions/mean_terminated_length": 817.8700561523438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 330.0,
"epoch": 0.0672,
"grad_norm": 0.01171042863279581,
"learning_rate": 3.8055555555555556e-06,
"loss": -0.0046,
"num_tokens": 16904152.0,
"reward": 1.7990059852600098,
"reward_std": 0.3330551087856293,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/final_brier_reward_step": 0.641627311706543,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7575217485427856,
"step": 63
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5147540983606558,
"calib/avg_num_step_conf": 4.93359375,
"calib/ece": 0.2632411067193676,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9881422924901185,
"calib/gap": -0.0015940671350509472,
"calib/mean_conf": 0.9865612648221345,
"calib/mu_c": 0.9861202185792348,
"calib/mu_w": 0.9877142857142858,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2632411067193676,
"calib/std_conf": 0.011712683641421357,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2517.0,
"completions/max_terminated_length": 2517.0,
"completions/mean_length": 777.28125,
"completions/mean_terminated_length": 789.6190795898438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 368.0,
"epoch": 0.06826666666666667,
"grad_norm": 0.009954201057553291,
"learning_rate": 3.777777777777778e-06,
"loss": 0.0079,
"num_tokens": 17206912.0,
"reward": 1.949721336364746,
"reward_std": 0.5117054581642151,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.72124844789505,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.8120118379592896,
"step": 64
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5699662030417262,
"calib/avg_num_step_conf": 4.90625,
"calib/ece": 0.37192156862745107,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.001418172364486936,
"calib/mean_conf": 0.987607843137255,
"calib/mu_c": 0.9881528662420381,
"calib/mu_w": 0.9867346938775512,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37192156862745107,
"calib/std_conf": 0.006084626823662836,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2177.0,
"completions/max_terminated_length": 2177.0,
"completions/mean_length": 724.48046875,
"completions/mean_terminated_length": 735.980224609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 342.0,
"epoch": 0.06933333333333333,
"grad_norm": 0.015558356419205666,
"learning_rate": 3.7500000000000005e-06,
"loss": -0.0087,
"num_tokens": 17497403.0,
"reward": 1.7543145418167114,
"reward_std": 0.38484832644462585,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.6193417906761169,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7338536977767944,
"step": 65
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.4737023139462164,
"calib/avg_num_step_conf": 4.93359375,
"calib/ece": 0.473197628458498,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.001172607879925125,
"calib/mean_conf": 0.9870316205533597,
"calib/mu_c": 0.9864615384615385,
"calib/mu_w": 0.9876341463414636,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.473197628458498,
"calib/std_conf": 0.008257646363266199,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2491.0,
"completions/max_terminated_length": 2491.0,
"completions/mean_length": 826.0,
"completions/mean_terminated_length": 835.7944946289062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 355.0,
"epoch": 0.0704,
"grad_norm": 0.009562370367348194,
"learning_rate": 3.7222222222222225e-06,
"loss": -0.0245,
"num_tokens": 17815211.0,
"reward": 1.5644525289535522,
"reward_std": 0.4363713562488556,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.5194617509841919,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7149111032485962,
"step": 66
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.6139020225899658,
"calib/avg_num_step_conf": 4.94140625,
"calib/ece": 0.35351562500000006,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.99609375,
"calib/gap": 0.003777252429734701,
"calib/mean_conf": 0.9851562500000002,
"calib/mu_c": 0.9865432098765432,
"calib/mu_w": 0.9827659574468085,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35292968750000003,
"calib/std_conf": 0.012898908323478394,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1802.0,
"completions/max_terminated_length": 1802.0,
"completions/mean_length": 775.19140625,
"completions/mean_terminated_length": 787.49609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 432.0,
"epoch": 0.07146666666666666,
"grad_norm": 0.011739449575543404,
"learning_rate": 3.694444444444445e-06,
"loss": 0.0093,
"num_tokens": 18118668.0,
"reward": 1.8034915924072266,
"reward_std": 0.41813188791275024,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.6447757482528687,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7801278829574585,
"step": 67
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5155316606929511,
"calib/avg_num_step_conf": 4.88671875,
"calib/ece": 0.3502745098039217,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.0011489446435684059,
"calib/mean_conf": 0.9855686274509805,
"calib/mu_c": 0.9859876543209878,
"calib/mu_w": 0.9848387096774194,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3502745098039217,
"calib/std_conf": 0.01142624241661134,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2532.0,
"completions/max_terminated_length": 2532.0,
"completions/mean_length": 761.01953125,
"completions/mean_terminated_length": 773.0992431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 328.0,
"epoch": 0.07253333333333334,
"grad_norm": 0.011653666384518147,
"learning_rate": 3.6666666666666666e-06,
"loss": -0.034,
"num_tokens": 18417577.0,
"reward": 1.7976186275482178,
"reward_std": 0.37335824966430664,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.6434906721115112,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7579212188720703,
"step": 68
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.6188670685073563,
"calib/avg_num_step_conf": 4.90234375,
"calib/ece": 0.42772,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.98,
"calib/gap": 0.004584224512281909,
"calib/mean_conf": 0.98372,
"calib/mu_c": 0.9857553956834533,
"calib/mu_w": 0.9811711711711714,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.42772,
"calib/std_conf": 0.01513147712551554,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2520.0,
"completions/max_terminated_length": 2520.0,
"completions/mean_length": 743.21484375,
"completions/mean_terminated_length": 758.0199584960938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 383.0,
"epoch": 0.0736,
"grad_norm": 0.013122929260134697,
"learning_rate": 3.638888888888889e-06,
"loss": 0.0125,
"num_tokens": 18712336.0,
"reward": 1.6212658882141113,
"reward_std": 0.3981201648712158,
"rewards/accuracy_reward_step": 0.54296875,
"rewards/final_brier_reward_step": 0.5588144659996033,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7153115272521973,
"step": 69
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.6050340136054422,
"calib/avg_num_step_conf": 4.765625,
"calib/ece": 0.3887854251012147,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.004750340136054465,
"calib/mean_conf": 0.983927125506073,
"calib/mu_c": 0.9858503401360545,
"calib/mu_w": 0.9811000000000001,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3887854251012147,
"calib/std_conf": 0.012121297925437025,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2420.0,
"completions/max_terminated_length": 2420.0,
"completions/mean_length": 834.828125,
"completions/mean_terminated_length": 854.864013671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 366.0,
"epoch": 0.07466666666666667,
"grad_norm": 0.012035924009978771,
"learning_rate": 3.6111111111111115e-06,
"loss": -0.0489,
"num_tokens": 19033044.0,
"reward": 1.669405221939087,
"reward_std": 0.2907199263572693,
"rewards/accuracy_reward_step": 0.57421875,
"rewards/final_brier_reward_step": 0.584688663482666,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.7257446050643921,
"step": 70
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5980133727658481,
"calib/avg_num_step_conf": 4.93359375,
"calib/ece": 0.37721568627450996,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9803921568627451,
"calib/gap": 0.0064735759290215356,
"calib/mean_conf": 0.9811372549019609,
"calib/mu_c": 0.9837012987012987,
"calib/mu_w": 0.9772277227722772,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.37721568627450996,
"calib/std_conf": 0.018124860629990892,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1754.0,
"completions/max_terminated_length": 1754.0,
"completions/mean_length": 778.9921875,
"completions/mean_terminated_length": 791.357177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 370.0,
"epoch": 0.07573333333333333,
"grad_norm": 0.012741051614284515,
"learning_rate": 3.5833333333333335e-06,
"loss": 0.0015,
"num_tokens": 19336874.0,
"reward": 1.7401325702667236,
"reward_std": 0.6308400630950928,
"rewards/accuracy_reward_step": 0.6015625,
"rewards/final_brier_reward_step": 0.6186171770095825,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7559757232666016,
"step": 71
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.6513831121674258,
"calib/avg_num_step_conf": 4.94140625,
"calib/ece": 0.3752777777777778,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9841269841269841,
"calib/gap": 0.010308972073678091,
"calib/mean_conf": 0.9824206349206349,
"calib/mu_c": 0.9864705882352942,
"calib/mu_w": 0.9761616161616161,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3752777777777778,
"calib/std_conf": 0.015612293271143017,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2219.0,
"completions/max_terminated_length": 2219.0,
"completions/mean_length": 708.7109375,
"completions/mean_terminated_length": 722.8287353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 390.0,
"epoch": 0.0768,
"grad_norm": 0.011979524977505207,
"learning_rate": 3.555555555555556e-06,
"loss": -0.0052,
"num_tokens": 19622712.0,
"reward": 1.7353651523590088,
"reward_std": 0.37993955612182617,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.615549623966217,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7712236642837524,
"step": 72
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.569731738849386,
"calib/avg_num_step_conf": 4.9375,
"calib/ece": 0.2537600000000001,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 0.984,
"calib/gap": 0.005447640594699266,
"calib/mean_conf": 0.9817600000000001,
"calib/mu_c": 0.9832417582417582,
"calib/mu_w": 0.9777941176470589,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2537600000000001,
"calib/std_conf": 0.016926381775205245,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2510.0,
"completions/max_terminated_length": 2510.0,
"completions/mean_length": 709.1484375,
"completions/mean_terminated_length": 723.27490234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 363.0,
"epoch": 0.07786666666666667,
"grad_norm": 0.015338779427111149,
"learning_rate": 3.5277777777777784e-06,
"loss": 0.0176,
"num_tokens": 19911286.0,
"reward": 1.9338476657867432,
"reward_std": 0.49027276039123535,
"rewards/accuracy_reward_step": 0.7109375,
"rewards/final_brier_reward_step": 0.7219749689102173,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.8024784326553345,
"step": 73
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.543591922096595,
"calib/avg_num_step_conf": 4.94140625,
"calib/ece": 0.4088400000000002,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0035925756486504534,
"calib/mean_conf": 0.9808400000000002,
"calib/mu_c": 0.9823776223776225,
"calib/mu_w": 0.978785046728972,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4088400000000002,
"calib/std_conf": 0.015533653787824694,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2793.0,
"completions/max_terminated_length": 2793.0,
"completions/mean_length": 714.86328125,
"completions/mean_terminated_length": 729.1036376953125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 376.0,
"epoch": 0.07893333333333333,
"grad_norm": 0.011618921533226967,
"learning_rate": 3.5e-06,
"loss": -0.019,
"num_tokens": 20198219.0,
"reward": 1.651144027709961,
"reward_std": 0.4587170481681824,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.57573401927948,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7241549491882324,
"step": 74
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.6485243055555555,
"calib/avg_num_step_conf": 5.046875,
"calib/ece": 0.21861111111111128,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9642857142857143,
"calib/gap": 0.009864583333333177,
"calib/mean_conf": 0.9805158730158731,
"calib/mu_c": 0.9828645833333334,
"calib/mu_w": 0.9730000000000002,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.21861111111111128,
"calib/std_conf": 0.030422483886513023,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2484.0,
"completions/max_terminated_length": 2484.0,
"completions/mean_length": 698.39453125,
"completions/mean_terminated_length": 709.480224609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 402.0,
"epoch": 0.08,
"grad_norm": 0.011897450312972069,
"learning_rate": 3.4722222222222224e-06,
"loss": -0.017,
"num_tokens": 20481760.0,
"reward": 2.0114951133728027,
"reward_std": 0.47728487849235535,
"rewards/accuracy_reward_step": 0.75,
"rewards/final_brier_reward_step": 0.7613714933395386,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.8158584833145142,
"step": 75
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5058423913043478,
"calib/avg_num_step_conf": 4.96875,
"calib/ece": 0.35123015873015884,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.996031746031746,
"calib/gap": -0.000584239130434816,
"calib/mean_conf": 0.9861507936507937,
"calib/mu_c": 0.9859375,
"calib/mu_w": 0.9865217391304348,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35123015873015884,
"calib/std_conf": 0.0132985065758489,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2503.0,
"completions/max_terminated_length": 2503.0,
"completions/mean_length": 691.328125,
"completions/mean_terminated_length": 702.3016357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 343.0,
"epoch": 0.08106666666666666,
"grad_norm": 0.013411462306976318,
"learning_rate": 3.444444444444445e-06,
"loss": -0.0219,
"num_tokens": 20761796.0,
"reward": 1.770603060722351,
"reward_std": 0.3884144425392151,
"rewards/accuracy_reward_step": 0.625,
"rewards/final_brier_reward_step": 0.6304218769073486,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7410531044006348,
"step": 76
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5758309248554914,
"calib/avg_num_step_conf": 4.9609375,
"calib/ece": 0.30312252964426867,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9960474308300395,
"calib/gap": 0.004814306358381537,
"calib/mean_conf": 0.9869169960474308,
"calib/mu_c": 0.9884393063583815,
"calib/mu_w": 0.983625,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30312252964426867,
"calib/std_conf": 0.012852962457764204,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1565.0,
"completions/max_terminated_length": 1565.0,
"completions/mean_length": 657.34375,
"completions/mean_terminated_length": 670.4382934570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 332.0,
"epoch": 0.08213333333333334,
"grad_norm": 0.011887096799910069,
"learning_rate": 3.416666666666667e-06,
"loss": -0.0116,
"num_tokens": 21034740.0,
"reward": 1.8679624795913696,
"reward_std": 0.5159233808517456,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6855285167694092,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7706961631774902,
"step": 77
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5792716891797799,
"calib/avg_num_step_conf": 4.859375,
"calib/ece": 0.3280592885375495,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9881422924901185,
"calib/gap": 0.008647611753237694,
"calib/mean_conf": 0.9881383399209487,
"calib/mu_c": 0.9910778443113772,
"calib/mu_w": 0.9824302325581395,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3280592885375495,
"calib/std_conf": 0.021115544926909733,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1318.0,
"completions/max_terminated_length": 1318.0,
"completions/mean_length": 689.78125,
"completions/mean_terminated_length": 700.730224609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 358.0,
"epoch": 0.0832,
"grad_norm": 0.013817236758768559,
"learning_rate": 3.3888888888888893e-06,
"loss": -0.0103,
"num_tokens": 21319348.0,
"reward": 1.8158355951309204,
"reward_std": 0.4627304673194885,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.6557570099830627,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7325854301452637,
"step": 78
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.49284195605953224,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.3271314741035858,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 0.9920318725099602,
"calib/gap": -0.0022891566265060836,
"calib/mean_conf": 0.9884860557768925,
"calib/mu_c": 0.987710843373494,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3271314741035858,
"calib/std_conf": 0.015072824705491235,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2502.0,
"completions/max_terminated_length": 2502.0,
"completions/mean_length": 671.00390625,
"completions/mean_terminated_length": 681.65478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 316.0,
"epoch": 0.08426666666666667,
"grad_norm": 0.04336387291550636,
"learning_rate": 3.3611111111111117e-06,
"loss": -0.0013,
"num_tokens": 21597501.0,
"reward": 1.810776948928833,
"reward_std": 0.4501277208328247,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.6545706987380981,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7447869777679443,
"step": 79
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5829744473155326,
"calib/avg_num_step_conf": 4.16796875,
"calib/ece": 0.31088932806324104,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 0.9960474308300395,
"calib/gap": 0.0086118288831466,
"calib/mean_conf": 0.9907312252964426,
"calib/mu_c": 0.9934883720930232,
"calib/mu_w": 0.9848765432098766,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.31088932806324104,
"calib/std_conf": 0.03193927806759079,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1133.0,
"completions/max_terminated_length": 1133.0,
"completions/mean_length": 560.9375,
"completions/mean_terminated_length": 569.84130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 194.0,
"epoch": 0.08533333333333333,
"grad_norm": 0.01943901553750038,
"learning_rate": 3.3333333333333333e-06,
"loss": -0.0237,
"num_tokens": 21843261.0,
"reward": 1.8514273166656494,
"reward_std": 0.4234926700592041,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.6763685941696167,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7371532320976257,
"step": 80
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.536545240893067,
"calib/avg_num_step_conf": 3.375,
"calib/ece": 0.26109803149606314,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": 0.002820955738347286,
"calib/mean_conf": 0.9894444881889765,
"calib/mu_c": 0.990210810810811,
"calib/mu_w": 0.9873898550724637,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26109803149606314,
"calib/std_conf": 0.013146753032183678,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1464.0,
"completions/max_terminated_length": 1464.0,
"completions/mean_length": 547.81640625,
"completions/mean_terminated_length": 556.511962890625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 223.0,
"epoch": 0.0864,
"grad_norm": 0.051593534648418427,
"learning_rate": 3.3055555555555558e-06,
"loss": -0.018,
"num_tokens": 22089750.0,
"reward": 1.9547233581542969,
"reward_std": 0.4877452552318573,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.729171872138977,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7694095373153687,
"step": 81
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5687758945386063,
"calib/avg_num_step_conf": 3.703125,
"calib/ece": 0.28967063492063494,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.97265625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0036992090395480126,
"calib/mean_conf": 0.9920515873015873,
"calib/mu_c": 0.9931525423728814,
"calib/mu_w": 0.9894533333333334,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.28967063492063494,
"calib/std_conf": 0.009179067232472568,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1269.0,
"completions/max_terminated_length": 1269.0,
"completions/mean_length": 525.2109375,
"completions/mean_terminated_length": 535.67333984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 126.0,
"epoch": 0.08746666666666666,
"grad_norm": 0.014503278769552708,
"learning_rate": 3.277777777777778e-06,
"loss": -0.0238,
"num_tokens": 22329756.0,
"reward": 1.881931185722351,
"reward_std": 0.35736411809921265,
"rewards/accuracy_reward_step": 0.69140625,
"rewards/final_brier_reward_step": 0.6894752979278564,
"rewards/format_reward_step": 0.97265625,
"rewards/stepwise_brier_reward": 0.7444990873336792,
"step": 82
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5276703167005443,
"calib/avg_num_step_conf": 4.2734375,
"calib/ece": 0.398373015873016,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0012477870303586602,
"calib/mean_conf": 0.9975793650793652,
"calib/mu_c": 0.9980794701986754,
"calib/mu_w": 0.9968316831683167,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.398373015873016,
"calib/std_conf": 0.005099180099988029,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 3022.0,
"completions/max_terminated_length": 3022.0,
"completions/mean_length": 606.671875,
"completions/mean_terminated_length": 621.2320556640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 294.0,
"epoch": 0.08853333333333334,
"grad_norm": 0.014670017175376415,
"learning_rate": 3.2500000000000002e-06,
"loss": -0.0316,
"num_tokens": 22592328.0,
"reward": 1.6692111492156982,
"reward_std": 0.40030843019485474,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.5803611874580383,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.6433586478233337,
"step": 83
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5767650745021585,
"calib/avg_num_step_conf": 4.78125,
"calib/ece": 0.3348913043478261,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 0.9723320158102767,
"calib/gap": 0.012164740286868314,
"calib/mean_conf": 0.9898320158102766,
"calib/mu_c": 0.9939670658682636,
"calib/mu_w": 0.9818023255813952,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3323221343873518,
"calib/std_conf": 0.06313149307339118,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2190.0,
"completions/max_terminated_length": 2190.0,
"completions/mean_length": 588.890625,
"completions/mean_terminated_length": 595.87353515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 142.0,
"epoch": 0.0896,
"grad_norm": 0.014643252827227116,
"learning_rate": 3.2222222222222227e-06,
"loss": -0.014,
"num_tokens": 22849004.0,
"reward": 1.7984957695007324,
"reward_std": 0.5485701560974121,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.6409984230995178,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.709234893321991,
"step": 84
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5872381847475832,
"calib/avg_num_step_conf": 4.79296875,
"calib/ece": 0.3902511999999999,
"calib/final_conf_rate": 0.9765625,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0023364930182599997,
"calib/mean_conf": 0.9982511999999999,
"calib/mu_c": 0.9991671052631579,
"calib/mu_w": 0.9968306122448979,
"calib/nonempty_final_conf_rate": 0.9765625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3902511999999999,
"calib/std_conf": 0.004816656367232362,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2649.0,
"completions/max_terminated_length": 2649.0,
"completions/mean_length": 625.96875,
"completions/mean_terminated_length": 635.90478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 212.0,
"epoch": 0.09066666666666667,
"grad_norm": 0.01396125741302967,
"learning_rate": 3.1944444444444443e-06,
"loss": -0.0154,
"num_tokens": 23117076.0,
"reward": 1.6774587631225586,
"reward_std": 0.4869632124900818,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.5878802537918091,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.6453918814659119,
"step": 85
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5401567398119123,
"calib/avg_num_step_conf": 4.89453125,
"calib/ece": 0.430001725490196,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0001905611285267783,
"calib/mean_conf": 0.9986291764705881,
"calib/mu_c": 0.9987113793103449,
"calib/mu_w": 0.9985208181818181,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.430001725490196,
"calib/std_conf": 0.003159838372050952,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1341.0,
"completions/max_terminated_length": 1341.0,
"completions/mean_length": 589.5390625,
"completions/mean_terminated_length": 598.8968505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 203.0,
"epoch": 0.09173333333333333,
"grad_norm": 0.0163346566259861,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0001,
"num_tokens": 23373510.0,
"reward": 1.6383399963378906,
"reward_std": 0.5051381587982178,
"rewards/accuracy_reward_step": 0.56640625,
"rewards/final_brier_reward_step": 0.5676577687263489,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6028897762298584,
"step": 86
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5182412060301508,
"calib/avg_num_step_conf": 4.890625,
"calib/ece": 0.20084948192771096,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 0.9959839357429718,
"calib/gap": -0.0019423119597991034,
"calib/mean_conf": 0.9976366305220884,
"calib/mu_c": 0.9972466080402009,
"calib/mu_w": 0.99918892,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.19964466265060252,
"calib/std_conf": 0.019559850423825895,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2501.0,
"completions/max_terminated_length": 2501.0,
"completions/mean_length": 588.63671875,
"completions/mean_terminated_length": 605.1846923828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 335.0,
"epoch": 0.0928,
"grad_norm": 0.015122613869607449,
"learning_rate": 3.138888888888889e-06,
"loss": -0.0381,
"num_tokens": 23629697.0,
"reward": 2.0388693809509277,
"reward_std": 0.32220447063446045,
"rewards/accuracy_reward_step": 0.77734375,
"rewards/final_brier_reward_step": 0.7655643224716187,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.8039764761924744,
"step": 87
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.5836539941511467,
"calib/avg_num_step_conf": 4.8828125,
"calib/ece": 0.2867130517928287,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 0.9960159362549801,
"calib/gap": 0.013177578882561214,
"calib/mean_conf": 0.9958763984063747,
"calib/mu_c": 0.9997089213483147,
"calib/mu_w": 0.9865313424657535,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2867130517928287,
"calib/std_conf": 0.03848135956455916,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 1248.0,
"completions/max_terminated_length": 1248.0,
"completions/mean_length": 608.25390625,
"completions/mean_terminated_length": 622.85205078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 315.0,
"epoch": 0.09386666666666667,
"grad_norm": 0.01662285625934601,
"learning_rate": 3.1111111111111116e-06,
"loss": -0.0363,
"num_tokens": 23895258.0,
"reward": 1.8751245737075806,
"reward_std": 0.4252081513404846,
"rewards/accuracy_reward_step": 0.6953125,
"rewards/final_brier_reward_step": 0.6741741299629211,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.7560117244720459,
"step": 88
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.45828840970350404,
"calib/avg_num_step_conf": 5.265625,
"calib/ece": 0.43035649951178867,
"calib/final_conf_rate": 0.9609375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004923530909297646,
"calib/mean_conf": 0.9994621905686992,
"calib/mu_c": 0.9996743427135714,
"calib/mu_w": 0.9991819896226416,
"calib/nonempty_final_conf_rate": 0.9609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.43035649951178867,
"calib/std_conf": 0.0021776488690664034,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2567.0,
"completions/max_terminated_length": 2567.0,
"completions/mean_length": 666.6171875,
"completions/mean_terminated_length": 688.1209716796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 350.0,
"epoch": 0.09493333333333333,
"grad_norm": 0.016230568289756775,
"learning_rate": 3.0833333333333336e-06,
"loss": -0.0306,
"num_tokens": 24174800.0,
"reward": 1.5884817838668823,
"reward_std": 0.45879366993904114,
"rewards/accuracy_reward_step": 0.546875,
"rewards/final_brier_reward_step": 0.5436413288116455,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.6227859258651733,
"step": 89
},
{
"calib/answer_extract_rate": 0.7890625,
"calib/auroc": 0.5662393162393162,
"calib/avg_num_step_conf": 6.234375,
"calib/ece": 0.35607282648366334,
"calib/final_conf_rate": 0.7890625,
"calib/format_rate": 0.77734375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004208154250425755,
"calib/mean_conf": 0.9996371829193069,
"calib/mu_c": 0.9997871765361538,
"calib/mu_w": 0.9993663611111112,
"calib/nonempty_final_conf_rate": 0.7890625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.35607282648366334,
"calib/std_conf": 0.0016880916927079351,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.15234375,
"completions/max_length": 3003.0,
"completions/max_terminated_length": 3003.0,
"completions/mean_length": 707.5546875,
"completions/mean_terminated_length": 834.7188720703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 360.0,
"epoch": 0.096,
"grad_norm": 0.024469584226608276,
"learning_rate": 3.055555555555556e-06,
"loss": -0.1823,
"num_tokens": 24459254.0,
"reward": 1.4220082759857178,
"reward_std": 0.8066992163658142,
"rewards/accuracy_reward_step": 0.5078125,
"rewards/final_brier_reward_step": 0.508165717124939,
"rewards/format_reward_step": 0.77734375,
"rewards/stepwise_brier_reward": 0.5783044099807739,
"step": 90
},
{
"calib/answer_extract_rate": 0.859375,
"calib/auroc": 0.4834140218755603,
"calib/avg_num_step_conf": 4.671875,
"calib/ece": 0.352261226244344,
"calib/final_conf_rate": 0.86328125,
"calib/format_rate": 0.84375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0001986480186483286,
"calib/mean_conf": 0.9993200497737558,
"calib/mu_c": 0.999390160839161,
"calib/mu_w": 0.9991915128205127,
"calib/nonempty_final_conf_rate": 0.86328125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.352261226244344,
"calib/std_conf": 0.0024244563587599646,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.09375,
"completions/max_length": 2925.0,
"completions/max_terminated_length": 2925.0,
"completions/mean_length": 723.5625,
"completions/mean_terminated_length": 798.413818359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 392.0,
"epoch": 0.09706666666666666,
"grad_norm": 0.011809024028480053,
"learning_rate": 3.0277777777777776e-06,
"loss": -0.0879,
"num_tokens": 24752198.0,
"reward": 1.5397309064865112,
"reward_std": 0.6513643264770508,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.543455958366394,
"rewards/format_reward_step": 0.84375,
"rewards/stepwise_brier_reward": 0.5764051675796509,
"step": 91
},
{
"calib/answer_extract_rate": 0.8359375,
"calib/auroc": 0.5808421729347476,
"calib/avg_num_step_conf": 4.71484375,
"calib/ece": 0.2820045789252337,
"calib/final_conf_rate": 0.8359375,
"calib/format_rate": 0.8203125,
"calib/frac_conf_gt_0.9": 0.9953271028037384,
"calib/gap": 0.00020097260152163177,
"calib/mean_conf": 0.9969578499532711,
"calib/mu_c": 0.9970151365359478,
"calib/mu_w": 0.9968141639344261,
"calib/nonempty_final_conf_rate": 0.8359375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2820045789252337,
"calib/std_conf": 0.00902268426083969,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.13671875,
"completions/max_length": 2844.0,
"completions/max_terminated_length": 2844.0,
"completions/mean_length": 607.453125,
"completions/mean_terminated_length": 703.6561279296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 389.0,
"epoch": 0.09813333333333334,
"grad_norm": 0.019445186480879784,
"learning_rate": 3e-06,
"loss": -0.1993,
"num_tokens": 25014426.0,
"reward": 1.6243741512298584,
"reward_std": 0.8272405862808228,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.5951131582260132,
"rewards/format_reward_step": 0.8203125,
"rewards/stepwise_brier_reward": 0.6758211851119995,
"step": 92
},
{
"calib/answer_extract_rate": 0.8203125,
"calib/auroc": 0.5516564952048822,
"calib/avg_num_step_conf": 4.57421875,
"calib/ece": 0.28653983276190476,
"calib/final_conf_rate": 0.8203125,
"calib/format_rate": 0.80859375,
"calib/frac_conf_gt_0.9": 0.9952380952380953,
"calib/gap": 0.016762255074106225,
"calib/mean_conf": 0.9913017375238096,
"calib/mu_c": 0.9962505937837839,
"calib/mu_w": 0.9794883387096777,
"calib/nonempty_final_conf_rate": 0.8203125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.28653983276190476,
"calib/std_conf": 0.06881002485183123,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1171875,
"completions/max_length": 3006.0,
"completions/max_terminated_length": 3006.0,
"completions/mean_length": 717.1015625,
"completions/mean_terminated_length": 812.2920532226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 385.0,
"epoch": 0.0992,
"grad_norm": 0.015961183235049248,
"learning_rate": 2.9722222222222225e-06,
"loss": -0.1268,
"num_tokens": 25303780.0,
"reward": 1.5653667449951172,
"reward_std": 0.7328537106513977,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.5762243866920471,
"rewards/format_reward_step": 0.80859375,
"rewards/stepwise_brier_reward": 0.5993050336837769,
"step": 93
},
{
"calib/answer_extract_rate": 0.87109375,
"calib/auroc": 0.40063063063063065,
"calib/avg_num_step_conf": 4.65234375,
"calib/ece": 0.3247029910267859,
"calib/final_conf_rate": 0.875,
"calib/format_rate": 0.85546875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0019463427693693358,
"calib/mean_conf": 0.994345848169643,
"calib/mu_c": 0.9937028599333331,
"calib/mu_w": 0.9956492027027024,
"calib/nonempty_final_conf_rate": 0.875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3247029910267859,
"calib/std_conf": 0.004928637147614021,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 2633.0,
"completions/max_terminated_length": 2633.0,
"completions/mean_length": 610.27734375,
"completions/mean_terminated_length": 679.2651977539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 346.0,
"epoch": 0.10026666666666667,
"grad_norm": 0.01557290181517601,
"learning_rate": 2.944444444444445e-06,
"loss": -0.1429,
"num_tokens": 25568691.0,
"reward": 1.6035652160644531,
"reward_std": 0.6202777624130249,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.5805128812789917,
"rewards/format_reward_step": 0.85546875,
"rewards/stepwise_brier_reward": 0.6071857213973999,
"step": 94
},
{
"calib/answer_extract_rate": 0.90234375,
"calib/auroc": 0.49102341358484325,
"calib/avg_num_step_conf": 4.7578125,
"calib/ece": 0.3309030172409484,
"calib/final_conf_rate": 0.90625,
"calib/format_rate": 0.89453125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00017896086578106374,
"calib/mean_conf": 0.990385775861638,
"calib/mu_c": 0.9903248366013072,
"calib/mu_w": 0.9905037974670883,
"calib/nonempty_final_conf_rate": 0.90625,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3309030172409484,
"calib/std_conf": 0.0019203123156793884,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.078125,
"completions/max_length": 2909.0,
"completions/max_terminated_length": 2909.0,
"completions/mean_length": 620.13671875,
"completions/mean_terminated_length": 672.690673828125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 330.0,
"epoch": 0.10133333333333333,
"grad_norm": 0.0158828254789114,
"learning_rate": 2.916666666666667e-06,
"loss": -0.1137,
"num_tokens": 25833574.0,
"reward": 1.6486221551895142,
"reward_std": 0.6622723340988159,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.599368691444397,
"rewards/format_reward_step": 0.89453125,
"rewards/stepwise_brier_reward": 0.620120108127594,
"step": 95
},
{
"calib/answer_extract_rate": 0.92578125,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.78125,
"calib/ece": 0.21689075630252097,
"calib/final_conf_rate": 0.9296875,
"calib/format_rate": 0.921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.9296875,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.21689075630252097,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05078125,
"completions/max_length": 2941.0,
"completions/max_terminated_length": 2941.0,
"completions/mean_length": 605.23828125,
"completions/mean_terminated_length": 637.6172485351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 262.0,
"epoch": 0.1024,
"grad_norm": 0.014743155799806118,
"learning_rate": 2.888888888888889e-06,
"loss": -0.0544,
"num_tokens": 26094331.0,
"reward": 1.9054555892944336,
"reward_std": 0.5066296458244324,
"rewards/accuracy_reward_step": 0.71875,
"rewards/final_brier_reward_step": 0.7188921570777893,
"rewards/format_reward_step": 0.921875,
"rewards/stepwise_brier_reward": 0.7466801404953003,
"step": 96
},
{
"calib/answer_extract_rate": 0.96875,
"calib/auroc": 0.4942528735632184,
"calib/avg_num_step_conf": 4.8828125,
"calib/ece": 0.33930923694779114,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0002528735632186807,
"calib/mean_conf": 0.9899116465863453,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9897471264367813,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.33930923694779114,
"calib/std_conf": 0.0026586648707911872,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2682.0,
"completions/max_terminated_length": 2682.0,
"completions/mean_length": 574.765625,
"completions/mean_terminated_length": 588.5599975585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 275.0,
"epoch": 0.10346666666666667,
"grad_norm": 0.016557862982153893,
"learning_rate": 2.861111111111111e-06,
"loss": -0.0397,
"num_tokens": 26346543.0,
"reward": 1.7600922584533691,
"reward_std": 0.4420820474624634,
"rewards/accuracy_reward_step": 0.6328125,
"rewards/final_brier_reward_step": 0.63569176197052,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.6781147718429565,
"step": 97
},
{
"calib/answer_extract_rate": 0.98046875,
"calib/auroc": 0.49673202614379086,
"calib/avg_num_step_conf": 4.890625,
"calib/ece": 0.3802788844621514,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0002614379084966423,
"calib/mean_conf": 0.9898406374501992,
"calib/mu_c": 0.9897385620915033,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3802788844621514,
"calib/std_conf": 0.002519743155512655,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2586.0,
"completions/max_terminated_length": 2586.0,
"completions/mean_length": 603.6953125,
"completions/mean_terminated_length": 613.27783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 309.0,
"epoch": 0.10453333333333334,
"grad_norm": 0.027152279391884804,
"learning_rate": 2.8333333333333335e-06,
"loss": -0.0164,
"num_tokens": 26607273.0,
"reward": 1.6960666179656982,
"reward_std": 0.5141116380691528,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.6012991666793823,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6439046859741211,
"step": 98
},
{
"calib/answer_extract_rate": 0.97265625,
"calib/auroc": 0.5081300813008129,
"calib/avg_num_step_conf": 4.73828125,
"calib/ece": 0.4836546184738957,
"calib/final_conf_rate": 0.97265625,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006504065040648932,
"calib/mean_conf": 0.9896787148594378,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.989349593495935,
"calib/nonempty_final_conf_rate": 0.97265625,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.4836546184738957,
"calib/std_conf": 0.003570459561589219,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2906.0,
"completions/max_terminated_length": 2906.0,
"completions/mean_length": 604.25390625,
"completions/mean_terminated_length": 616.2908325195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 317.0,
"epoch": 0.1056,
"grad_norm": 0.013146537356078625,
"learning_rate": 2.805555555555556e-06,
"loss": 0.0024,
"num_tokens": 26867762.0,
"reward": 1.4687217473983765,
"reward_std": 0.42152565717697144,
"rewards/accuracy_reward_step": 0.4921875,
"rewards/final_brier_reward_step": 0.4981667995452881,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.5095323920249939,
"step": 99
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5055555555555555,
"calib/avg_num_step_conf": 4.80859375,
"calib/ece": 0.34397637795275593,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.9960629921259843,
"calib/gap": 0.0010000000000000009,
"calib/mean_conf": 0.9896456692913386,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9889999999999999,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.34397637795275593,
"calib/std_conf": 0.005635974940365421,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1190.0,
"completions/max_terminated_length": 1190.0,
"completions/mean_length": 579.734375,
"completions/mean_terminated_length": 588.9365234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 359.0,
"epoch": 0.10666666666666667,
"grad_norm": 0.01472147461026907,
"learning_rate": 2.7777777777777783e-06,
"loss": -0.0164,
"num_tokens": 27123582.0,
"reward": 1.7837918996810913,
"reward_std": 0.3634037673473358,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.6404097080230713,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6822579503059387,
"step": 100
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5047619047619047,
"calib/avg_num_step_conf": 4.77734375,
"calib/ece": 0.3978823529411765,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.00942857142857123,
"calib/mean_conf": 0.9861176470588234,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9805714285714285,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3978823529411765,
"calib/std_conf": 0.061874524219624055,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 950.0,
"completions/max_terminated_length": 950.0,
"completions/mean_length": 560.92578125,
"completions/mean_terminated_length": 569.8294067382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 286.0,
"epoch": 0.10773333333333333,
"grad_norm": 0.018577059730887413,
"learning_rate": 2.7500000000000004e-06,
"loss": -0.0101,
"num_tokens": 27374171.0,
"reward": 1.6812111139297485,
"reward_std": 0.45339512825012207,
"rewards/accuracy_reward_step": 0.5859375,
"rewards/final_brier_reward_step": 0.5976362824440002,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6428329944610596,
"step": 101
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5140845070422535,
"calib/avg_num_step_conf": 4.9765625,
"calib/ece": 0.26726562500000006,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0002816901408453365,
"calib/mean_conf": 0.9899218750000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9897183098591547,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26726562500000006,
"calib/std_conf": 0.0008804240366863011,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 981.0,
"completions/max_terminated_length": 981.0,
"completions/mean_length": 532.2734375,
"completions/mean_terminated_length": 540.7222290039062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 304.0,
"epoch": 0.1088,
"grad_norm": 0.022051550447940826,
"learning_rate": 2.7222222222222224e-06,
"loss": -0.0047,
"num_tokens": 27617129.0,
"reward": 1.951385259628296,
"reward_std": 0.3563922047615051,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.724351167678833,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7530649900436401,
"step": 102
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5048543689320388,
"calib/avg_num_step_conf": 4.7890625,
"calib/ece": 0.39388235294117646,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 9.708737864111416e-05,
"calib/mean_conf": 0.9899607843137255,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.989902912621359,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.39388235294117646,
"calib/std_conf": 0.0006249951941376175,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1021.0,
"completions/max_terminated_length": 1021.0,
"completions/mean_length": 568.23828125,
"completions/mean_terminated_length": 577.2579956054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 251.0,
"epoch": 0.10986666666666667,
"grad_norm": 0.013331396505236626,
"learning_rate": 2.6944444444444444e-06,
"loss": -0.024,
"num_tokens": 27867150.0,
"reward": 1.6928791999816895,
"reward_std": 0.3353043496608734,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.5938847064971924,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6463817358016968,
"step": 103
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5004629629629629,
"calib/avg_num_step_conf": 4.87109375,
"calib/ece": 0.4602745098039216,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.7037037036835585e-05,
"calib/mean_conf": 0.989686274509804,
"calib/mu_c": 0.9897037037037036,
"calib/mu_w": 0.9896666666666668,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4602745098039216,
"calib/std_conf": 0.003528540197396707,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1184.0,
"completions/max_terminated_length": 1184.0,
"completions/mean_length": 550.453125,
"completions/mean_terminated_length": 559.1904907226562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 307.0,
"epoch": 0.11093333333333333,
"grad_norm": 0.019555984064936638,
"learning_rate": 2.666666666666667e-06,
"loss": -0.0098,
"num_tokens": 28114746.0,
"reward": 1.5784821510314941,
"reward_std": 0.47673073410987854,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5368350744247437,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6286554932594299,
"step": 104
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.73828125,
"calib/ece": 0.3337500000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.3337500000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1196.0,
"completions/max_terminated_length": 1196.0,
"completions/mean_length": 579.578125,
"completions/mean_terminated_length": 588.77783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 334.0,
"epoch": 0.112,
"grad_norm": 0.01764088310301304,
"learning_rate": 2.6388888888888893e-06,
"loss": -0.0063,
"num_tokens": 28368878.0,
"reward": 1.8065965175628662,
"reward_std": 0.5221689343452454,
"rewards/accuracy_reward_step": 0.65625,
"rewards/final_brier_reward_step": 0.6587303876876831,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6770305633544922,
"step": 105
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.859375,
"calib/ece": 0.32725490196078433,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.32725490196078433,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 970.0,
"completions/max_terminated_length": 970.0,
"completions/mean_length": 557.8984375,
"completions/mean_terminated_length": 566.7540283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 351.0,
"epoch": 0.11306666666666666,
"grad_norm": 0.030452396720647812,
"learning_rate": 2.6111111111111113e-06,
"loss": -0.0206,
"num_tokens": 28616284.0,
"reward": 1.8300036191940308,
"reward_std": 0.255470335483551,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/final_brier_reward_step": 0.6628695130348206,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7118322849273682,
"step": 106
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.82421875,
"calib/ece": 0.3390196078431372,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3390196078431372,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 959.0,
"completions/max_terminated_length": 959.0,
"completions/mean_length": 577.65625,
"completions/mean_terminated_length": 586.825439453125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 302.0,
"epoch": 0.11413333333333334,
"grad_norm": 0.01888842135667801,
"learning_rate": 2.5833333333333337e-06,
"loss": -0.0032,
"num_tokens": 28868780.0,
"reward": 1.8138813972473145,
"reward_std": 0.4561561942100525,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.6551355123519897,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7332028150558472,
"step": 107
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5075757575757576,
"calib/avg_num_step_conf": 4.5234375,
"calib/ece": 0.24765625000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006060606060603879,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9893939393939394,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.24765625000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1136.0,
"completions/max_terminated_length": 1136.0,
"completions/mean_length": 606.58203125,
"completions/mean_terminated_length": 616.2103271484375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 100.0,
"epoch": 0.1152,
"grad_norm": 0.0432201586663723,
"learning_rate": 2.5555555555555557e-06,
"loss": 0.0091,
"num_tokens": 29127297.0,
"reward": 1.9844746589660645,
"reward_std": 0.38650619983673096,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/final_brier_reward_step": 0.7396574020385742,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7685538530349731,
"step": 108
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5042016806722689,
"calib/avg_num_step_conf": 4.3359375,
"calib/ece": 0.4583464566929134,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00033613445378166684,
"calib/mean_conf": 0.9898425196850394,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9896638655462182,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.4583464566929134,
"calib/std_conf": 0.0025048777512735247,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2622.0,
"completions/max_terminated_length": 2622.0,
"completions/mean_length": 608.46875,
"completions/mean_terminated_length": 618.1270141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 179.0,
"epoch": 0.11626666666666667,
"grad_norm": 0.027083788067102432,
"learning_rate": 2.5277777777777778e-06,
"loss": -0.0143,
"num_tokens": 29387665.0,
"reward": 1.5690433979034424,
"reward_std": 0.3326517939567566,
"rewards/accuracy_reward_step": 0.52734375,
"rewards/final_brier_reward_step": 0.5328608751296997,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6105002164840698,
"step": 109
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.2578125,
"calib/ece": 0.2634375000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2634375000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1095.0,
"completions/max_terminated_length": 1095.0,
"completions/mean_length": 558.4375,
"completions/mean_terminated_length": 567.3016357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 245.0,
"epoch": 0.11733333333333333,
"grad_norm": 0.02330888621509075,
"learning_rate": 2.5e-06,
"loss": -0.0048,
"num_tokens": 29635545.0,
"reward": 1.9650754928588867,
"reward_std": 0.2993090748786926,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/final_brier_reward_step": 0.7280253171920776,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7807142734527588,
"step": 110
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.63671875,
"calib/ece": 0.2868750000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2868750000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1105.0,
"completions/max_terminated_length": 1105.0,
"completions/mean_length": 582.09765625,
"completions/mean_terminated_length": 591.3373413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.1184,
"grad_norm": 0.022671138867735863,
"learning_rate": 2.4722222222222226e-06,
"loss": 0.0106,
"num_tokens": 29891970.0,
"reward": 1.9075825214385986,
"reward_std": 0.394694060087204,
"rewards/accuracy_reward_step": 0.703125,
"rewards/final_brier_reward_step": 0.7009952664375305,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7418348789215088,
"step": 111
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5052083333333333,
"calib/avg_num_step_conf": 2.2734375,
"calib/ece": 0.36779527559055125,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00041666666666639873,
"calib/mean_conf": 0.9898425196850394,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9895833333333334,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.96484375,
"calib/pce": 0.36779527559055125,
"calib/std_conf": 0.0025048777512735247,
"calib/step_conf_rate": 0.96484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 1046.0,
"completions/max_terminated_length": 1046.0,
"completions/mean_length": 521.0546875,
"completions/mean_terminated_length": 527.2332153320312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 191.0,
"epoch": 0.11946666666666667,
"grad_norm": 0.021626276895403862,
"learning_rate": 2.4444444444444447e-06,
"loss": 0.0012,
"num_tokens": 30133280.0,
"reward": 1.7234654426574707,
"reward_std": 0.3433002233505249,
"rewards/accuracy_reward_step": 0.6171875,
"rewards/final_brier_reward_step": 0.6124820113182068,
"rewards/format_reward_step": 0.9609375,
"rewards/stepwise_brier_reward": 0.6563798189163208,
"step": 112
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.49609375,
"calib/ece": 0.3381781376518217,
"calib/final_conf_rate": 0.96484375,
"calib/format_rate": 0.953125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9899999999999999,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.96484375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3381781376518217,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1766.0,
"completions/max_terminated_length": 1766.0,
"completions/mean_length": 424.765625,
"completions/mean_terminated_length": 431.5079650878906,
"completions/min_length": 0.0,
"completions/min_terminated_length": 137.0,
"epoch": 0.12053333333333334,
"grad_norm": 0.03900357708334923,
"learning_rate": 2.4166666666666667e-06,
"loss": 0.0037,
"num_tokens": 30347220.0,
"reward": 1.7606676816940308,
"reward_std": 0.4159401059150696,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.6276389956474304,
"rewards/format_reward_step": 0.953125,
"rewards/stepwise_brier_reward": 0.6650315523147583,
"step": 113
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5073529411764706,
"calib/avg_num_step_conf": 1.91796875,
"calib/ece": 0.2607569721115538,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005882352941175562,
"calib/mean_conf": 0.9898406374501992,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9894117647058822,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2607569721115538,
"calib/std_conf": 0.0025197431555126553,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 999.0,
"completions/max_terminated_length": 999.0,
"completions/mean_length": 442.65625,
"completions/mean_terminated_length": 449.68255615234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 192.0,
"epoch": 0.1216,
"grad_norm": 0.02818751521408558,
"learning_rate": 2.388888888888889e-06,
"loss": -0.0079,
"num_tokens": 30565564.0,
"reward": 1.9415286779403687,
"reward_std": 0.31948792934417725,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/final_brier_reward_step": 0.7203612923622131,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7020031809806824,
"step": 114
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 1.98828125,
"calib/ece": 0.30496062992125983,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.30496062992125983,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 812.0,
"completions/max_terminated_length": 812.0,
"completions/mean_length": 482.14453125,
"completions/mean_terminated_length": 489.7976379394531,
"completions/min_length": 0.0,
"completions/min_terminated_length": 255.0,
"epoch": 0.12266666666666666,
"grad_norm": 0.018928449600934982,
"learning_rate": 2.361111111111111e-06,
"loss": -0.0046,
"num_tokens": 30794257.0,
"reward": 1.8706040382385254,
"reward_std": 0.28297024965286255,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6818546652793884,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.706811249256134,
"step": 115
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.19921875,
"calib/ece": 0.27740157480314964,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.27740157480314964,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1201.0,
"completions/max_terminated_length": 1201.0,
"completions/mean_length": 551.5390625,
"completions/mean_terminated_length": 562.5259399414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 223.0,
"epoch": 0.12373333333333333,
"grad_norm": 0.026971513405442238,
"learning_rate": 2.3333333333333336e-06,
"loss": -0.0107,
"num_tokens": 31039971.0,
"reward": 1.9170682430267334,
"reward_std": 0.36526399850845337,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/final_brier_reward_step": 0.7047457098960876,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7604023218154907,
"step": 116
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 2.35546875,
"calib/ece": 0.4252941176470588,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.4252941176470588,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2653.0,
"completions/max_terminated_length": 2653.0,
"completions/mean_length": 537.69140625,
"completions/mean_terminated_length": 544.0671997070312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 218.0,
"epoch": 0.1248,
"grad_norm": 0.020982950925827026,
"learning_rate": 2.305555555555556e-06,
"loss": -0.0025,
"num_tokens": 31284220.0,
"reward": 1.643857717514038,
"reward_std": 0.2697311341762543,
"rewards/accuracy_reward_step": 0.5625,
"rewards/final_brier_reward_step": 0.5707613229751587,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6687314510345459,
"step": 117
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 3.91015625,
"calib/ece": 0.3493750000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3493750000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1261.0,
"completions/max_terminated_length": 1261.0,
"completions/mean_length": 660.16796875,
"completions/mean_terminated_length": 670.6468505859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 305.0,
"epoch": 0.12586666666666665,
"grad_norm": 0.018190357834100723,
"learning_rate": 2.277777777777778e-06,
"loss": 0.0059,
"num_tokens": 31557231.0,
"reward": 1.787449598312378,
"reward_std": 0.31239908933639526,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.6437289118766785,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6779447793960571,
"step": 118
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5120481927710843,
"calib/avg_num_step_conf": 4.29296875,
"calib/ece": 0.3151764705882353,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0009638554216867545,
"calib/mean_conf": 0.989686274509804,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9890361445783131,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.3151764705882353,
"calib/std_conf": 0.0035285401973967068,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2498.0,
"completions/max_terminated_length": 2498.0,
"completions/mean_length": 721.1015625,
"completions/mean_terminated_length": 729.6522216796875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 369.0,
"epoch": 0.12693333333333334,
"grad_norm": 0.016771312803030014,
"learning_rate": 2.25e-06,
"loss": 0.0116,
"num_tokens": 31846897.0,
"reward": 1.8449645042419434,
"reward_std": 0.3058851957321167,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.6744238138198853,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.7132465839385986,
"step": 119
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5063291139240507,
"calib/avg_num_step_conf": 4.79296875,
"calib/ece": 0.2963137254901961,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.011265822784810697,
"calib/mean_conf": 0.9865098039215686,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9787341772151894,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2963137254901961,
"calib/std_conf": 0.05562457227824789,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1411.0,
"completions/max_terminated_length": 1411.0,
"completions/mean_length": 716.73828125,
"completions/mean_terminated_length": 728.1151123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 375.0,
"epoch": 0.128,
"grad_norm": 0.015937916934490204,
"learning_rate": 2.222222222222222e-06,
"loss": -0.0076,
"num_tokens": 32137070.0,
"reward": 1.8887319564819336,
"reward_std": 0.39160123467445374,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6934167742729187,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7599482536315918,
"step": 120
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.9296875,
"calib/ece": 0.3142187500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3142187500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1378.0,
"completions/max_terminated_length": 1378.0,
"completions/mean_length": 765.953125,
"completions/mean_terminated_length": 778.1111450195312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 406.0,
"epoch": 0.12906666666666666,
"grad_norm": 0.014766894280910492,
"learning_rate": 2.1944444444444445e-06,
"loss": -0.0073,
"num_tokens": 32438210.0,
"reward": 1.8706910610198975,
"reward_std": 0.38398224115371704,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6821656227111816,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7459110021591187,
"step": 121
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.8203125,
"calib/ece": 0.26952755905511805,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.26952755905511805,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1984.0,
"completions/max_terminated_length": 1984.0,
"completions/mean_length": 771.515625,
"completions/mean_terminated_length": 786.8844604492188,
"completions/min_length": 0.0,
"completions/min_terminated_length": 441.0,
"epoch": 0.13013333333333332,
"grad_norm": 0.016249533742666245,
"learning_rate": 2.166666666666667e-06,
"loss": -0.0174,
"num_tokens": 32743062.0,
"reward": 1.937490701675415,
"reward_std": 0.2781732380390167,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7201359272003174,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7720146179199219,
"step": 122
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 4.98828125,
"calib/ece": 0.3154901960784313,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3154901960784313,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1635.0,
"completions/max_terminated_length": 1635.0,
"completions/mean_length": 782.2265625,
"completions/mean_terminated_length": 794.6428833007812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 414.0,
"epoch": 0.1312,
"grad_norm": 0.015606064349412918,
"learning_rate": 2.138888888888889e-06,
"loss": -0.0121,
"num_tokens": 33048600.0,
"reward": 1.850453495979309,
"reward_std": 0.41477206349372864,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.6782597303390503,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7001171112060547,
"step": 123
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.00390625,
"calib/ece": 0.2537795275590551,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2537795275590551,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 3063.0,
"completions/max_terminated_length": 3063.0,
"completions/mean_length": 758.68359375,
"completions/mean_terminated_length": 767.6798706054688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 386.0,
"epoch": 0.13226666666666667,
"grad_norm": 0.01349243987351656,
"learning_rate": 2.1111111111111114e-06,
"loss": -0.0228,
"num_tokens": 33349639.0,
"reward": 1.9734143018722534,
"reward_std": 0.2765697240829468,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/final_brier_reward_step": 0.7355260848999023,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.798755943775177,
"step": 124
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.484375,
"calib/ece": 0.4079687500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.4079687500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1572.0,
"completions/max_terminated_length": 1572.0,
"completions/mean_length": 790.0234375,
"completions/mean_terminated_length": 802.5635375976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 409.0,
"epoch": 0.13333333333333333,
"grad_norm": 0.01633782498538494,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0061,
"num_tokens": 33656693.0,
"reward": 1.6857736110687256,
"reward_std": 0.39842092990875244,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.5902905464172363,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6606166362762451,
"step": 125
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5053763440860215,
"calib/avg_num_step_conf": 5.95703125,
"calib/ece": 0.35598425196850403,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00043010752688177334,
"calib/mean_conf": 0.9898425196850394,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9895698924731181,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.35598425196850403,
"calib/std_conf": 0.0025048777512735247,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1829.0,
"completions/max_terminated_length": 1829.0,
"completions/mean_length": 814.25390625,
"completions/mean_terminated_length": 827.1785888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 425.0,
"epoch": 0.1344,
"grad_norm": 0.014358713291585445,
"learning_rate": 2.0555555555555555e-06,
"loss": -0.0459,
"num_tokens": 33970606.0,
"reward": 1.7619569301605225,
"reward_std": 0.3340670168399811,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/final_brier_reward_step": 0.6362203359603882,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6694203615188599,
"step": 126
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.66015625,
"calib/ece": 0.3987301587301588,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3987301587301588,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2625.0,
"completions/max_terminated_length": 2625.0,
"completions/mean_length": 825.26171875,
"completions/mean_terminated_length": 835.0474853515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 484.0,
"epoch": 0.13546666666666668,
"grad_norm": 0.016708409413695335,
"learning_rate": 2.027777777777778e-06,
"loss": 0.0111,
"num_tokens": 34285545.0,
"reward": 1.671630620956421,
"reward_std": 0.4748235046863556,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.5899796485900879,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6356050968170166,
"step": 127
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.49832301341589263,
"calib/avg_num_step_conf": 6.03515625,
"calib/ece": 0.39169291338582685,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9921259842519685,
"calib/gap": 2.0639834881541752e-05,
"calib/mean_conf": 0.9881496062992127,
"calib/mu_c": 0.988157894736842,
"calib/mu_w": 0.9881372549019605,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.39070866141732286,
"calib/std_conf": 0.01928141316257154,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1477.0,
"completions/max_terminated_length": 1477.0,
"completions/mean_length": 827.19140625,
"completions/mean_terminated_length": 840.3214721679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 413.0,
"epoch": 0.13653333333333334,
"grad_norm": 0.0164869986474514,
"learning_rate": 2.0000000000000003e-06,
"loss": -0.017,
"num_tokens": 34603970.0,
"reward": 1.705299973487854,
"reward_std": 0.37409543991088867,
"rewards/accuracy_reward_step": 0.59375,
"rewards/final_brier_reward_step": 0.6026171445846558,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6795204281806946,
"step": 128
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5067567567567568,
"calib/avg_num_step_conf": 7.16796875,
"calib/ece": 0.28233201581027667,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.000540540540540424,
"calib/mean_conf": 0.9898418972332016,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9894594594594595,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.28233201581027667,
"calib/std_conf": 0.00250980361523914,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1890.0,
"completions/max_terminated_length": 1890.0,
"completions/mean_length": 822.20703125,
"completions/mean_terminated_length": 838.585693359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 547.0,
"epoch": 0.1376,
"grad_norm": 0.01689060591161251,
"learning_rate": 1.9722222222222224e-06,
"loss": -0.0346,
"num_tokens": 34916839.0,
"reward": 1.9011118412017822,
"reward_std": 0.37755003571510315,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/final_brier_reward_step": 0.7051265239715576,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7352585196495056,
"step": 129
},
{
"calib/answer_extract_rate": 0.953125,
"calib/auroc": 0.5304878048780488,
"calib/avg_num_step_conf": 8.2421875,
"calib/ece": 0.32387755102040816,
"calib/final_conf_rate": 0.95703125,
"calib/format_rate": 0.94921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0024390243902440156,
"calib/mean_conf": 0.9891836734693877,
"calib/mu_c": 0.99,
"calib/mu_w": 0.987560975609756,
"calib/nonempty_final_conf_rate": 0.95703125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.32387755102040816,
"calib/std_conf": 0.00565567610634736,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 3010.0,
"completions/max_terminated_length": 3010.0,
"completions/mean_length": 890.4375,
"completions/mean_terminated_length": 926.6340942382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 553.0,
"epoch": 0.13866666666666666,
"grad_norm": 0.011856338940560818,
"learning_rate": 1.944444444444445e-06,
"loss": -0.0684,
"num_tokens": 35250079.0,
"reward": 1.772405743598938,
"reward_std": 0.3086838722229004,
"rewards/accuracy_reward_step": 0.63671875,
"rewards/final_brier_reward_step": 0.6405613422393799,
"rewards/format_reward_step": 0.94921875,
"rewards/stepwise_brier_reward": 0.7303117513656616,
"step": 130
},
{
"calib/answer_extract_rate": 0.9140625,
"calib/auroc": 0.5043103448275862,
"calib/avg_num_step_conf": 10.23828125,
"calib/ece": 0.4813559322033898,
"calib/final_conf_rate": 0.921875,
"calib/format_rate": 0.8984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0003448275862071304,
"calib/mean_conf": 0.9898305084745762,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.989655172413793,
"calib/nonempty_final_conf_rate": 0.921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.98046875,
"calib/pce": 0.4813559322033898,
"calib/std_conf": 0.002598255884195915,
"calib/step_conf_rate": 0.98046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0703125,
"completions/max_length": 2647.0,
"completions/max_terminated_length": 2647.0,
"completions/mean_length": 1048.74609375,
"completions/mean_terminated_length": 1128.0631103515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 619.0,
"epoch": 0.13973333333333332,
"grad_norm": 0.012333076447248459,
"learning_rate": 1.916666666666667e-06,
"loss": -0.0836,
"num_tokens": 35624766.0,
"reward": 1.3999123573303223,
"reward_std": 0.5030518770217896,
"rewards/accuracy_reward_step": 0.46875,
"rewards/final_brier_reward_step": 0.4699007570743561,
"rewards/format_reward_step": 0.8984375,
"rewards/stepwise_brier_reward": 0.5203734040260315,
"step": 131
},
{
"calib/answer_extract_rate": 0.71875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 14.734375,
"calib/ece": 0.23193548387096763,
"calib/final_conf_rate": 0.7265625,
"calib/format_rate": 0.71484375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -4.440892098500626e-16,
"calib/mean_conf": 0.9899999999999999,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 0.7265625,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.23193548387096763,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.24609375,
"completions/max_length": 2778.0,
"completions/max_terminated_length": 2778.0,
"completions/mean_length": 989.8671875,
"completions/mean_terminated_length": 1312.984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 750.0,
"epoch": 0.1408,
"grad_norm": 0.01676846109330654,
"learning_rate": 1.888888888888889e-06,
"loss": -0.2793,
"num_tokens": 35983764.0,
"reward": 1.461935043334961,
"reward_std": 0.9555952548980713,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.5501629114151001,
"rewards/format_reward_step": 0.71484375,
"rewards/stepwise_brier_reward": 0.5632020235061646,
"step": 132
},
{
"calib/answer_extract_rate": 0.89453125,
"calib/auroc": 0.5052631578947369,
"calib/avg_num_step_conf": 11.91796875,
"calib/ece": 0.40052401746724886,
"calib/final_conf_rate": 0.89453125,
"calib/format_rate": 0.890625,
"calib/frac_conf_gt_0.9": 0.9956331877729258,
"calib/gap": 0.01042105263157933,
"calib/mean_conf": 0.9856768558951965,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9795789473684207,
"calib/nonempty_final_conf_rate": 0.89453125,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.40052401746724886,
"calib/std_conf": 0.0652780444621663,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1015625,
"completions/max_length": 3065.0,
"completions/max_terminated_length": 3065.0,
"completions/mean_length": 1199.4765625,
"completions/mean_terminated_length": 1335.0694580078125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 832.0,
"epoch": 0.14186666666666667,
"grad_norm": 0.011118494905531406,
"learning_rate": 1.8611111111111113e-06,
"loss": -0.1199,
"num_tokens": 36397174.0,
"reward": 1.5080835819244385,
"reward_std": 0.6873135566711426,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.53452068567276,
"rewards/format_reward_step": 0.890625,
"rewards/stepwise_brier_reward": 0.575938880443573,
"step": 133
},
{
"calib/answer_extract_rate": 0.94140625,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 10.70703125,
"calib/ece": 0.40975308641975305,
"calib/final_conf_rate": 0.94921875,
"calib/format_rate": 0.9375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.94921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.40975308641975305,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0390625,
"completions/max_length": 2778.0,
"completions/max_terminated_length": 2778.0,
"completions/mean_length": 1273.45703125,
"completions/mean_terminated_length": 1325.2235107421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 609.0,
"epoch": 0.14293333333333333,
"grad_norm": 0.009259031154215336,
"learning_rate": 1.8333333333333333e-06,
"loss": -0.0652,
"num_tokens": 36832131.0,
"reward": 1.5892717838287354,
"reward_std": 0.6638240814208984,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.5584218502044678,
"rewards/format_reward_step": 0.9375,
"rewards/stepwise_brier_reward": 0.6189777851104736,
"step": 134
},
{
"calib/answer_extract_rate": 0.9765625,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 10.3046875,
"calib/ece": 0.2927888446215139,
"calib/final_conf_rate": 0.98046875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.98046875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2927888446215139,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2527.0,
"completions/max_terminated_length": 2527.0,
"completions/mean_length": 1229.83984375,
"completions/mean_terminated_length": 1259.3560791015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 763.0,
"epoch": 0.144,
"grad_norm": 0.009267253801226616,
"learning_rate": 1.8055555555555557e-06,
"loss": -0.0236,
"num_tokens": 37252850.0,
"reward": 1.8674653768539429,
"reward_std": 0.46042320132255554,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.6893554329872131,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.7258185148239136,
"step": 135
},
{
"calib/answer_extract_rate": 0.9609375,
"calib/auroc": 0.4917748917748918,
"calib/avg_num_step_conf": 9.94921875,
"calib/ece": 0.413266129032258,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.95703125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0003749583749584895,
"calib/mean_conf": 0.9898790322580645,
"calib/mu_c": 0.9897202797202795,
"calib/mu_w": 0.990095238095238,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.413266129032258,
"calib/std_conf": 0.002615378656158313,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.02734375,
"completions/max_length": 2607.0,
"completions/max_terminated_length": 2607.0,
"completions/mean_length": 1216.07421875,
"completions/mean_terminated_length": 1250.260986328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 747.0,
"epoch": 0.14506666666666668,
"grad_norm": 0.011020061559975147,
"learning_rate": 1.777777777777778e-06,
"loss": -0.0074,
"num_tokens": 37672653.0,
"reward": 1.6056492328643799,
"reward_std": 0.4844512939453125,
"rewards/accuracy_reward_step": 0.55859375,
"rewards/final_brier_reward_step": 0.5663796663284302,
"rewards/format_reward_step": 0.95703125,
"rewards/stepwise_brier_reward": 0.590592622756958,
"step": 136
},
{
"calib/answer_extract_rate": 0.96484375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 9.38671875,
"calib/ece": 0.29241935483870973,
"calib/final_conf_rate": 0.96875,
"calib/format_rate": 0.96484375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.96875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.29241935483870973,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03125,
"completions/max_length": 2941.0,
"completions/max_terminated_length": 2941.0,
"completions/mean_length": 1233.16796875,
"completions/mean_terminated_length": 1272.947509765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 798.0,
"epoch": 0.14613333333333334,
"grad_norm": 0.007238389924168587,
"learning_rate": 1.75e-06,
"loss": -0.0266,
"num_tokens": 38095328.0,
"reward": 1.8489694595336914,
"reward_std": 0.34204357862472534,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6814659833908081,
"rewards/format_reward_step": 0.96484375,
"rewards/stepwise_brier_reward": 0.7300364375114441,
"step": 137
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 8.7421875,
"calib/ece": 0.35862745098039217,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.35862745098039217,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2713.0,
"completions/max_terminated_length": 2713.0,
"completions/mean_length": 1092.24609375,
"completions/mean_terminated_length": 1105.1976318359375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 705.0,
"epoch": 0.1472,
"grad_norm": 0.010518020950257778,
"learning_rate": 1.7222222222222224e-06,
"loss": 0.0063,
"num_tokens": 38479279.0,
"reward": 1.7698776721954346,
"reward_std": 0.3144071698188782,
"rewards/accuracy_reward_step": 0.62890625,
"rewards/final_brier_reward_step": 0.6321667432785034,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6973439455032349,
"step": 138
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5028735632183908,
"calib/avg_num_step_conf": 7.91015625,
"calib/ece": 0.30499999999999994,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.747126436772554e-05,
"calib/mean_conf": 0.9900393700787401,
"calib/mu_c": 0.9900574712643677,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30499999999999994,
"calib/std_conf": 0.0006262194378183812,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1907.0,
"completions/max_terminated_length": 1907.0,
"completions/mean_length": 956.609375,
"completions/mean_terminated_length": 975.6653442382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 707.0,
"epoch": 0.14826666666666666,
"grad_norm": 0.010206053964793682,
"learning_rate": 1.6944444444444446e-06,
"loss": -0.0121,
"num_tokens": 38827267.0,
"reward": 1.8824926614761353,
"reward_std": 0.32546108961105347,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/final_brier_reward_step": 0.6858386993408203,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.781632125377655,
"step": 139
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5076923076923077,
"calib/avg_num_step_conf": 7.37109375,
"calib/ece": 0.24675889328063239,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006153846153845732,
"calib/mean_conf": 0.9898418972332016,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9893846153846154,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.24675889328063239,
"calib/std_conf": 0.00250980361523914,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2020.0,
"completions/max_terminated_length": 2020.0,
"completions/mean_length": 963.3359375,
"completions/mean_terminated_length": 982.5259399414062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 637.0,
"epoch": 0.14933333333333335,
"grad_norm": 0.010636700317263603,
"learning_rate": 1.6666666666666667e-06,
"loss": -0.0251,
"num_tokens": 39178897.0,
"reward": 1.9774314165115356,
"reward_std": 0.34751075506210327,
"rewards/accuracy_reward_step": 0.734375,
"rewards/final_brier_reward_step": 0.7396574020385742,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7872557044029236,
"step": 140
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 7.05859375,
"calib/ece": 0.2634375000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2634375000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1658.0,
"completions/max_terminated_length": 1658.0,
"completions/mean_length": 953.55078125,
"completions/mean_terminated_length": 968.6865844726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 602.0,
"epoch": 0.1504,
"grad_norm": 0.008473373018205166,
"learning_rate": 1.638888888888889e-06,
"loss": 0.0128,
"num_tokens": 39530102.0,
"reward": 1.9683520793914795,
"reward_std": 0.2899768352508545,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/final_brier_reward_step": 0.731931209564209,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.782101571559906,
"step": 141
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5052083333333333,
"calib/avg_num_step_conf": 6.81640625,
"calib/ece": 0.3707936507936509,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004166666666666208,
"calib/mean_conf": 0.98984126984127,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9895833333333334,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3707936507936509,
"calib/std_conf": 0.0025147586536118844,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1816.0,
"completions/max_terminated_length": 1816.0,
"completions/mean_length": 939.69140625,
"completions/mean_terminated_length": 958.410400390625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 600.0,
"epoch": 0.15146666666666667,
"grad_norm": 0.011722843162715435,
"learning_rate": 1.6111111111111113e-06,
"loss": -0.0243,
"num_tokens": 39875823.0,
"reward": 1.7259165048599243,
"reward_std": 0.4010692834854126,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.6166988015174866,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.6697796583175659,
"step": 142
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.80859375,
"calib/ece": 0.3181250000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3181250000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2091.0,
"completions/max_terminated_length": 2091.0,
"completions/mean_length": 901.0390625,
"completions/mean_terminated_length": 915.34130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 564.0,
"epoch": 0.15253333333333333,
"grad_norm": 0.010479445569217205,
"learning_rate": 1.5833333333333333e-06,
"loss": -0.0031,
"num_tokens": 40213825.0,
"reward": 1.8537890911102295,
"reward_std": 0.26736265420913696,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.6783374547958374,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7055689096450806,
"step": 143
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.6953125,
"calib/ece": 0.21745098039215682,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.21745098039215682,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1860.0,
"completions/max_terminated_length": 1860.0,
"completions/mean_length": 885.05078125,
"completions/mean_terminated_length": 899.0992431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 520.0,
"epoch": 0.1536,
"grad_norm": 0.010390263050794601,
"learning_rate": 1.5555555555555558e-06,
"loss": -0.0118,
"num_tokens": 40544526.0,
"reward": 2.0450034141540527,
"reward_std": 0.3080452084541321,
"rewards/accuracy_reward_step": 0.76953125,
"rewards/final_brier_reward_step": 0.7738851308822632,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8045663833618164,
"step": 144
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5149253731343284,
"calib/avg_num_step_conf": 6.59375,
"calib/ece": 0.2524313725490196,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0011940298507464586,
"calib/mean_conf": 0.989686274509804,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9888059701492535,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2524313725490196,
"calib/std_conf": 0.0035285401973967063,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1245.0,
"completions/max_terminated_length": 1245.0,
"completions/mean_length": 838.34765625,
"completions/mean_terminated_length": 851.65478515625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 519.0,
"epoch": 0.15466666666666667,
"grad_norm": 0.010068155825138092,
"learning_rate": 1.527777777777778e-06,
"loss": -0.0184,
"num_tokens": 40861847.0,
"reward": 1.9680100679397583,
"reward_std": 0.4435754418373108,
"rewards/accuracy_reward_step": 0.734375,
"rewards/final_brier_reward_step": 0.7401160001754761,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7334867715835571,
"step": 145
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5046728971962617,
"calib/avg_num_step_conf": 6.50390625,
"calib/ece": 0.4078515625000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0002803738317758153,
"calib/mean_conf": 0.9898828125000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9897196261682242,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4078515625000001,
"calib/std_conf": 0.0018713343073442964,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1567.0,
"completions/max_terminated_length": 1567.0,
"completions/mean_length": 868.91796875,
"completions/mean_terminated_length": 882.7103881835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 523.0,
"epoch": 0.15573333333333333,
"grad_norm": 0.011212436482310295,
"learning_rate": 1.5e-06,
"loss": 0.0043,
"num_tokens": 41191506.0,
"reward": 1.6800085306167603,
"reward_std": 0.34910422563552856,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.5904414057731628,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6452174782752991,
"step": 146
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5141509433962264,
"calib/avg_num_step_conf": 6.41796875,
"calib/ece": 0.40688976377952757,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0010377358490565314,
"calib/mean_conf": 0.9895669291338582,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9889622641509433,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.40688976377952757,
"calib/std_conf": 0.003994268632473766,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1298.0,
"completions/max_terminated_length": 1298.0,
"completions/mean_length": 832.86328125,
"completions/mean_terminated_length": 849.4542236328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 453.0,
"epoch": 0.1568,
"grad_norm": 0.012004495598375797,
"learning_rate": 1.4722222222222225e-06,
"loss": -0.0085,
"num_tokens": 41508399.0,
"reward": 1.6718380451202393,
"reward_std": 0.44913479685783386,
"rewards/accuracy_reward_step": 0.578125,
"rewards/final_brier_reward_step": 0.5870640277862549,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6549757719039917,
"step": 147
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5081967213114754,
"calib/avg_num_step_conf": 6.41015625,
"calib/ece": 0.22812500000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0006557377049178914,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9893442622950822,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.22812500000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1685.0,
"completions/max_terminated_length": 1685.0,
"completions/mean_length": 822.83984375,
"completions/mean_terminated_length": 835.9008178710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 342.0,
"epoch": 0.15786666666666666,
"grad_norm": 0.011262796819210052,
"learning_rate": 1.4444444444444445e-06,
"loss": 0.0045,
"num_tokens": 41824158.0,
"reward": 2.0360844135284424,
"reward_std": 0.37539172172546387,
"rewards/accuracy_reward_step": 0.76171875,
"rewards/final_brier_reward_step": 0.7666874527931213,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8073375821113586,
"step": 148
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.2734375,
"calib/ece": 0.3025000000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3025000000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1493.0,
"completions/max_terminated_length": 1493.0,
"completions/mean_length": 844.06640625,
"completions/mean_terminated_length": 857.46435546875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.15893333333333334,
"grad_norm": 0.012008114717900753,
"learning_rate": 1.4166666666666667e-06,
"loss": 0.0068,
"num_tokens": 42144695.0,
"reward": 1.8902229070663452,
"reward_std": 0.2819676399230957,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6936500072479248,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7422417998313904,
"step": 149
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.4140625,
"calib/ece": 0.3154901960784313,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3154901960784313,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2325.0,
"completions/max_terminated_length": 2325.0,
"completions/mean_length": 792.0078125,
"completions/mean_terminated_length": 804.5794067382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 419.0,
"epoch": 0.16,
"grad_norm": 0.010181116871535778,
"learning_rate": 1.3888888888888892e-06,
"loss": -0.0149,
"num_tokens": 42452409.0,
"reward": 1.8635663986206055,
"reward_std": 0.21723566949367523,
"rewards/accuracy_reward_step": 0.671875,
"rewards/final_brier_reward_step": 0.6782597303390503,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7525684833526611,
"step": 150
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.12890625,
"calib/ece": 0.3767187500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3767187500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2419.0,
"completions/max_terminated_length": 2419.0,
"completions/mean_length": 858.32421875,
"completions/mean_terminated_length": 871.948486328125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 495.0,
"epoch": 0.16106666666666666,
"grad_norm": 0.010340590961277485,
"learning_rate": 1.3611111111111112e-06,
"loss": 0.0127,
"num_tokens": 42779164.0,
"reward": 1.744431972503662,
"reward_std": 0.3037966191768646,
"rewards/accuracy_reward_step": 0.61328125,
"rewards/final_brier_reward_step": 0.6208378672599792,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.685015082359314,
"step": 151
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.50625,
"calib/avg_num_step_conf": 6.3203125,
"calib/ece": 0.3060474308300395,
"calib/final_conf_rate": 0.98828125,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004999999999999449,
"calib/mean_conf": 0.9898418972332016,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9894999999999999,
"calib/nonempty_final_conf_rate": 0.98828125,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3060474308300395,
"calib/std_conf": 0.0025098036152391397,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1740.0,
"completions/max_terminated_length": 1740.0,
"completions/mean_length": 831.84375,
"completions/mean_terminated_length": 848.4143676757812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 492.0,
"epoch": 0.16213333333333332,
"grad_norm": 0.011317038908600807,
"learning_rate": 1.3333333333333334e-06,
"loss": -0.0204,
"num_tokens": 43097508.0,
"reward": 1.8634164333343506,
"reward_std": 0.41251468658447266,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6822355389595032,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7401804327964783,
"step": 152
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5064102564102564,
"calib/avg_num_step_conf": 6.18359375,
"calib/ece": 0.2921960784313726,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.012051282051282475,
"calib/mean_conf": 0.986313725490196,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9779487179487176,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.2921960784313726,
"calib/std_conf": 0.058749548248935975,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2476.0,
"completions/max_terminated_length": 2476.0,
"completions/mean_length": 858.13671875,
"completions/mean_terminated_length": 868.3123168945312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 486.0,
"epoch": 0.1632,
"grad_norm": 0.009934207424521446,
"learning_rate": 1.3055555555555556e-06,
"loss": 0.0058,
"num_tokens": 43424511.0,
"reward": 1.892085075378418,
"reward_std": 0.27568596601486206,
"rewards/accuracy_reward_step": 0.69140625,
"rewards/final_brier_reward_step": 0.6972448825836182,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7460954189300537,
"step": 153
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.28515625,
"calib/ece": 0.39784313725490195,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -4.440892098500626e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999997,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.39784313725490195,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1861.0,
"completions/max_terminated_length": 1861.0,
"completions/mean_length": 825.82421875,
"completions/mean_terminated_length": 838.9326171875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 431.0,
"epoch": 0.16426666666666667,
"grad_norm": 0.010299614630639553,
"learning_rate": 1.2777777777777779e-06,
"loss": 0.006,
"num_tokens": 43740362.0,
"reward": 1.6944944858551025,
"reward_std": 0.31959229707717896,
"rewards/accuracy_reward_step": 0.58984375,
"rewards/final_brier_reward_step": 0.5978691577911377,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6488587856292725,
"step": 154
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.24609375,
"calib/ece": 0.4056862745098039,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98046875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.4056862745098039,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1397.0,
"completions/max_terminated_length": 1397.0,
"completions/mean_length": 798.93359375,
"completions/mean_terminated_length": 811.6151123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 494.0,
"epoch": 0.16533333333333333,
"grad_norm": 0.011612067930400372,
"learning_rate": 1.25e-06,
"loss": -0.0111,
"num_tokens": 44052105.0,
"reward": 1.667478322982788,
"reward_std": 0.5500566363334656,
"rewards/accuracy_reward_step": 0.58203125,
"rewards/final_brier_reward_step": 0.5860737562179565,
"rewards/format_reward_step": 0.98046875,
"rewards/stepwise_brier_reward": 0.630714476108551,
"step": 155
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5164835164835164,
"calib/avg_num_step_conf": 6.16796875,
"calib/ece": 0.3448437500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 0.99609375,
"calib/gap": 0.0017582417582415744,
"calib/mean_conf": 0.9893750000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9882417582417582,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3448437500000001,
"calib/std_conf": 0.006404344228724747,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1703.0,
"completions/max_terminated_length": 1703.0,
"completions/mean_length": 809.5703125,
"completions/mean_terminated_length": 822.420654296875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 475.0,
"epoch": 0.1664,
"grad_norm": 0.015950413420796394,
"learning_rate": 1.2222222222222223e-06,
"loss": 0.0128,
"num_tokens": 44364115.0,
"reward": 1.815731406211853,
"reward_std": 0.31029415130615234,
"rewards/accuracy_reward_step": 0.64453125,
"rewards/final_brier_reward_step": 0.6527366638183594,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7430015802383423,
"step": 156
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5096153846153846,
"calib/avg_num_step_conf": 6.2421875,
"calib/ece": 0.1900392156862745,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 0.996078431372549,
"calib/gap": 0.019038461538461338,
"calib/mean_conf": 0.9861176470588234,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9709615384615387,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.1900392156862745,
"calib/std_conf": 0.06187452421962405,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1220.0,
"completions/max_terminated_length": 1220.0,
"completions/mean_length": 780.29296875,
"completions/mean_terminated_length": 792.6785888671875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 435.0,
"epoch": 0.16746666666666668,
"grad_norm": 0.009241663850843906,
"learning_rate": 1.1944444444444446e-06,
"loss": -0.0087,
"num_tokens": 44667598.0,
"reward": 2.0960278511047363,
"reward_std": 0.348201185464859,
"rewards/accuracy_reward_step": 0.79296875,
"rewards/final_brier_reward_step": 0.8007601499557495,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8333513140678406,
"step": 157
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.359375,
"calib/ece": 0.2829687500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2829687500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1522.0,
"completions/max_terminated_length": 1522.0,
"completions/mean_length": 772.57421875,
"completions/mean_terminated_length": 784.8373413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 467.0,
"epoch": 0.16853333333333334,
"grad_norm": 0.011688906699419022,
"learning_rate": 1.1666666666666668e-06,
"loss": -0.0188,
"num_tokens": 44970617.0,
"reward": 1.9274451732635498,
"reward_std": 0.3046892285346985,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/final_brier_reward_step": 0.7088847160339355,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7665203809738159,
"step": 158
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5172413793103448,
"calib/avg_num_step_conf": 6.3203125,
"calib/ece": 0.3294921875000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.001034482758620947,
"calib/mean_conf": 0.9896484375000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.988965517241379,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3294921875000001,
"calib/std_conf": 0.0035730979287718623,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1249.0,
"completions/max_terminated_length": 1249.0,
"completions/mean_length": 777.06640625,
"completions/mean_terminated_length": 789.4008178710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.1696,
"grad_norm": 0.008924233727157116,
"learning_rate": 1.138888888888889e-06,
"loss": 0.0063,
"num_tokens": 45274330.0,
"reward": 1.843568205833435,
"reward_std": 0.34368807077407837,
"rewards/accuracy_reward_step": 0.66015625,
"rewards/final_brier_reward_step": 0.6674585342407227,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7536893486976624,
"step": 159
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.496875,
"calib/avg_num_step_conf": 6.12109375,
"calib/ece": 0.36239215686274506,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.0002499999999995284,
"calib/mean_conf": 0.9898431372549019,
"calib/mu_c": 0.9897500000000001,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.36239215686274506,
"calib/std_conf": 0.002499980776550469,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1343.0,
"completions/max_terminated_length": 1343.0,
"completions/mean_length": 804.47265625,
"completions/mean_terminated_length": 817.2421264648438,
"completions/min_length": 0.0,
"completions/min_terminated_length": 514.0,
"epoch": 0.17066666666666666,
"grad_norm": 0.01021601166576147,
"learning_rate": 1.111111111111111e-06,
"loss": -0.0102,
"num_tokens": 45585115.0,
"reward": 1.7612160444259644,
"reward_std": 0.4553215503692627,
"rewards/accuracy_reward_step": 0.625,
"rewards/final_brier_reward_step": 0.6283385753631592,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6899632215499878,
"step": 160
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5087719298245614,
"calib/avg_num_step_conf": 6.23046875,
"calib/ece": 0.21250000000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.000701754385964759,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9892982456140352,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.21250000000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1316.0,
"completions/max_terminated_length": 1316.0,
"completions/mean_length": 775.4921875,
"completions/mean_terminated_length": 787.8016357421875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 421.0,
"epoch": 0.17173333333333332,
"grad_norm": 0.009294044226408005,
"learning_rate": 1.0833333333333335e-06,
"loss": -0.0073,
"num_tokens": 45887561.0,
"reward": 2.063585042953491,
"reward_std": 0.2662416994571686,
"rewards/accuracy_reward_step": 0.77734375,
"rewards/final_brier_reward_step": 0.781999945640564,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8082783818244934,
"step": 161
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5089285714285714,
"calib/avg_num_step_conf": 6.26171875,
"calib/ece": 0.20859375000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0007142857142855563,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9892857142857144,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.20859375000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1628.0,
"completions/max_terminated_length": 1628.0,
"completions/mean_length": 763.7265625,
"completions/mean_terminated_length": 775.8492431640625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 362.0,
"epoch": 0.1728,
"grad_norm": 0.009800123982131481,
"learning_rate": 1.0555555555555557e-06,
"loss": -0.0024,
"num_tokens": 46187219.0,
"reward": 2.075169801712036,
"reward_std": 0.22966566681861877,
"rewards/accuracy_reward_step": 0.78125,
"rewards/final_brier_reward_step": 0.7858281135559082,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.8273511528968811,
"step": 162
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4971590909090909,
"calib/avg_num_step_conf": 6.11328125,
"calib/ece": 0.3024609375,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -5.681818181801912e-05,
"calib/mean_conf": 0.9899609375,
"calib/mu_c": 0.989943181818182,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3024609375,
"calib/std_conf": 0.0006237781024480987,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1786.0,
"completions/max_terminated_length": 1786.0,
"completions/mean_length": 793.3515625,
"completions/mean_terminated_length": 805.9445190429688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 461.0,
"epoch": 0.17386666666666667,
"grad_norm": 0.0091823386028409,
"learning_rate": 1.0277777777777777e-06,
"loss": -0.0163,
"num_tokens": 46495149.0,
"reward": 1.889510989189148,
"reward_std": 0.3669717311859131,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6935710310935974,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7472852468490601,
"step": 163
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5061728395061729,
"calib/avg_num_step_conf": 5.8515625,
"calib/ece": 0.30625000000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004938271604938427,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.989506172839506,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30625000000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1638.0,
"completions/max_terminated_length": 1638.0,
"completions/mean_length": 848.86328125,
"completions/mean_terminated_length": 862.3373413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 520.0,
"epoch": 0.17493333333333333,
"grad_norm": 0.009390867315232754,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.008,
"num_tokens": 46818594.0,
"reward": 1.882812261581421,
"reward_std": 0.35324978828430176,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.6901249885559082,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7395614981651306,
"step": 164
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.0859375,
"calib/ece": 0.4392187500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.4392187500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1340.0,
"completions/max_terminated_length": 1340.0,
"completions/mean_length": 797.6953125,
"completions/mean_terminated_length": 810.357177734375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 464.0,
"epoch": 0.176,
"grad_norm": 0.011323746293783188,
"learning_rate": 9.722222222222224e-07,
"loss": -0.0105,
"num_tokens": 47128380.0,
"reward": 1.6184314489364624,
"reward_std": 0.4666936695575714,
"rewards/accuracy_reward_step": 0.55078125,
"rewards/final_brier_reward_step": 0.559587836265564,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.6172629594802856,
"step": 165
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5136986301369864,
"calib/avg_num_step_conf": 6.14453125,
"calib/ece": 0.27484375000000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0010958904109586998,
"calib/mean_conf": 0.9896875,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9889041095890411,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.27484375000000005,
"calib/std_conf": 0.0035216961467452053,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1693.0,
"completions/max_terminated_length": 1693.0,
"completions/mean_length": 785.94921875,
"completions/mean_terminated_length": 798.4246215820312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 457.0,
"epoch": 0.17706666666666668,
"grad_norm": 0.008984563872218132,
"learning_rate": 9.444444444444445e-07,
"loss": 0.0073,
"num_tokens": 47435767.0,
"reward": 1.9408130645751953,
"reward_std": 0.24893325567245483,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7166886329650879,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7809381484985352,
"step": 166
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.078125,
"calib/ece": 0.2478125000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9900000000000001,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2478125000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1505.0,
"completions/max_terminated_length": 1505.0,
"completions/mean_length": 840.9609375,
"completions/mean_terminated_length": 854.3095703125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 518.0,
"epoch": 0.17813333333333334,
"grad_norm": 0.007702964823693037,
"learning_rate": 9.166666666666666e-07,
"loss": -0.019,
"num_tokens": 47756661.0,
"reward": 1.9908963441848755,
"reward_std": 0.36734330654144287,
"rewards/accuracy_reward_step": 0.7421875,
"rewards/final_brier_reward_step": 0.7432601451873779,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7828250527381897,
"step": 167
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.9140625,
"calib/ece": 0.22529411764705887,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.22529411764705887,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1476.0,
"completions/max_terminated_length": 1476.0,
"completions/mean_length": 840.86328125,
"completions/mean_terminated_length": 854.2103881835938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 431.0,
"epoch": 0.1792,
"grad_norm": 0.00855457317084074,
"learning_rate": 8.88888888888889e-07,
"loss": -0.0072,
"num_tokens": 48076594.0,
"reward": 2.0264196395874023,
"reward_std": 0.34020769596099854,
"rewards/accuracy_reward_step": 0.76171875,
"rewards/final_brier_reward_step": 0.76240074634552,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7885903120040894,
"step": 168
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5028901734104047,
"calib/avg_num_step_conf": 6.03515625,
"calib/ece": 0.3142578085937502,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 5.779768786129402e-05,
"calib/mean_conf": 0.9900390585937502,
"calib/mu_c": 0.9900577976878612,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.3142578085937502,
"calib/std_conf": 0.0006237157246378521,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1404.0,
"completions/max_terminated_length": 1404.0,
"completions/mean_length": 821.57421875,
"completions/mean_terminated_length": 834.6151123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 433.0,
"epoch": 0.18026666666666666,
"grad_norm": 0.011762641370296478,
"learning_rate": 8.611111111111112e-07,
"loss": 0.0026,
"num_tokens": 48391101.0,
"reward": 1.8578014373779297,
"reward_std": 0.32131901383399963,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6741988658905029,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7335692644119263,
"step": 169
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.08203125,
"calib/ece": 0.2595312500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2595312500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1446.0,
"completions/max_terminated_length": 1446.0,
"completions/mean_length": 816.5703125,
"completions/mean_terminated_length": 829.5317993164062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 483.0,
"epoch": 0.18133333333333335,
"grad_norm": 0.009184346534311771,
"learning_rate": 8.333333333333333e-07,
"loss": -0.006,
"num_tokens": 48704295.0,
"reward": 1.9696009159088135,
"reward_std": 0.23746982216835022,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/final_brier_reward_step": 0.7318534851074219,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7715502977371216,
"step": 170
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.19921875,
"calib/ece": 0.3806250000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3806250000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1453.0,
"completions/max_terminated_length": 1453.0,
"completions/mean_length": 794.14453125,
"completions/mean_terminated_length": 806.7500610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 467.0,
"epoch": 0.1824,
"grad_norm": 0.010356857441365719,
"learning_rate": 8.055555555555557e-07,
"loss": 0.0062,
"num_tokens": 49014492.0,
"reward": 1.7332839965820312,
"reward_std": 0.2521524429321289,
"rewards/accuracy_reward_step": 0.609375,
"rewards/final_brier_reward_step": 0.6170874834060669,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.6597986221313477,
"step": 171
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.2890625,
"calib/ece": 0.17503937007874015,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -4.440892098500626e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.17503937007874015,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1651.0,
"completions/max_terminated_length": 1651.0,
"completions/mean_length": 791.50390625,
"completions/mean_terminated_length": 807.2709350585938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 466.0,
"epoch": 0.18346666666666667,
"grad_norm": 0.008940366096794605,
"learning_rate": 7.777777777777779e-07,
"loss": 0.0022,
"num_tokens": 49320469.0,
"reward": 2.124072551727295,
"reward_std": 0.31485238671302795,
"rewards/accuracy_reward_step": 0.80859375,
"rewards/final_brier_reward_step": 0.8121663928031921,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.8481862545013428,
"step": 172
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.2605882352941177,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2605882352941177,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2366.0,
"completions/max_terminated_length": 2366.0,
"completions/mean_length": 822.77734375,
"completions/mean_terminated_length": 832.5336303710938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 391.0,
"epoch": 0.18453333333333333,
"grad_norm": 0.010574176907539368,
"learning_rate": 7.5e-07,
"loss": 0.0065,
"num_tokens": 49634260.0,
"reward": 1.9617128372192383,
"reward_std": 0.2606860399246216,
"rewards/accuracy_reward_step": 0.7265625,
"rewards/final_brier_reward_step": 0.7318534851074219,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7634353041648865,
"step": 173
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.9375,
"calib/ece": 0.34686274509803916,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9899999999999999,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.34686274509803916,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2392.0,
"completions/max_terminated_length": 2392.0,
"completions/mean_length": 833.1171875,
"completions/mean_terminated_length": 846.34130859375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 451.0,
"epoch": 0.1856,
"grad_norm": 0.014669586904346943,
"learning_rate": 7.222222222222222e-07,
"loss": -0.0094,
"num_tokens": 49951770.0,
"reward": 1.7839492559432983,
"reward_std": 0.47378671169281006,
"rewards/accuracy_reward_step": 0.640625,
"rewards/final_brier_reward_step": 0.63974529504776,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.6835517287254333,
"step": 174
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5004042288557213,
"calib/avg_num_step_conf": 6.01171875,
"calib/ece": 0.4621653543307087,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9765625,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -4.8507462686919744e-05,
"calib/mean_conf": 0.9897244094488189,
"calib/mu_c": 0.9897014925373132,
"calib/mu_w": 0.9897500000000001,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.9765625,
"calib/pce": 0.4621653543307087,
"calib/std_conf": 0.003125151121648213,
"calib/step_conf_rate": 0.9765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 2531.0,
"completions/max_terminated_length": 2531.0,
"completions/mean_length": 835.82421875,
"completions/mean_terminated_length": 852.47412109375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 431.0,
"epoch": 0.18666666666666668,
"grad_norm": 0.009562082588672638,
"learning_rate": 6.944444444444446e-07,
"loss": -0.0133,
"num_tokens": 50271565.0,
"reward": 1.5616577863693237,
"reward_std": 0.43291059136390686,
"rewards/accuracy_reward_step": 0.5234375,
"rewards/final_brier_reward_step": 0.5285648107528687,
"rewards/format_reward_step": 0.9765625,
"rewards/stepwise_brier_reward": 0.6243162155151367,
"step": 175
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5057471264367817,
"calib/avg_num_step_conf": 6.26171875,
"calib/ece": 0.33236220472440947,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.00045977011494291453,
"calib/mean_conf": 0.9898425196850394,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.989540229885057,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.33236220472440947,
"calib/std_conf": 0.0025048777512735247,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01953125,
"completions/max_length": 1715.0,
"completions/max_terminated_length": 1715.0,
"completions/mean_length": 801.9765625,
"completions/mean_terminated_length": 817.9522094726562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 448.0,
"epoch": 0.18773333333333334,
"grad_norm": 0.008803504519164562,
"learning_rate": 6.666666666666667e-07,
"loss": -0.0149,
"num_tokens": 50580935.0,
"reward": 1.8366894721984863,
"reward_std": 0.27378901839256287,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.659344494342804,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7889761924743652,
"step": 176
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.3220312500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3220312500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1522.0,
"completions/max_terminated_length": 1522.0,
"completions/mean_length": 825.61328125,
"completions/mean_terminated_length": 838.7183227539062,
"completions/min_length": 0.0,
"completions/min_terminated_length": 484.0,
"epoch": 0.1888,
"grad_norm": 0.009638850577175617,
"learning_rate": 6.388888888888889e-07,
"loss": -0.0124,
"num_tokens": 50896124.0,
"reward": 1.841766595840454,
"reward_std": 0.21446684002876282,
"rewards/accuracy_reward_step": 0.66796875,
"rewards/final_brier_reward_step": 0.6704480648040771,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7122435569763184,
"step": 177
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5068493150684932,
"calib/avg_num_step_conf": 6.26953125,
"calib/ece": 0.2750390625000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0004109589041094708,
"calib/mean_conf": 0.9898828125000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.9895890410958903,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2750390625000001,
"calib/std_conf": 0.001871334307344296,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1559.0,
"completions/max_terminated_length": 1559.0,
"completions/mean_length": 797.8359375,
"completions/mean_terminated_length": 810.5000610351562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 513.0,
"epoch": 0.18986666666666666,
"grad_norm": 0.010374956764280796,
"learning_rate": 6.111111111111112e-07,
"loss": -0.017,
"num_tokens": 51206442.0,
"reward": 1.940459132194519,
"reward_std": 0.40489891171455383,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7206753492355347,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7520986795425415,
"step": 178
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.4972067039106145,
"calib/avg_num_step_conf": 6.171875,
"calib/ece": 0.29062500000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -0.00022346368715042697,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.9897765363128492,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.29062500000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2087.0,
"completions/max_terminated_length": 2087.0,
"completions/mean_length": 836.5,
"completions/mean_terminated_length": 849.77783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 502.0,
"epoch": 0.19093333333333334,
"grad_norm": 0.01011861115694046,
"learning_rate": 5.833333333333334e-07,
"loss": 0.0039,
"num_tokens": 51526850.0,
"reward": 1.904581069946289,
"reward_std": 0.34588849544525146,
"rewards/accuracy_reward_step": 0.69921875,
"rewards/final_brier_reward_step": 0.7012190818786621,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7296052575111389,
"step": 179
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 5.9765625,
"calib/ece": 0.2517187500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2517187500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1729.0,
"completions/max_terminated_length": 1729.0,
"completions/mean_length": 885.42578125,
"completions/mean_terminated_length": 899.480224609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 556.0,
"epoch": 0.192,
"grad_norm": 0.009084006771445274,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0067,
"num_tokens": 51857375.0,
"reward": 1.994532823562622,
"reward_std": 0.30369269847869873,
"rewards/accuracy_reward_step": 0.73828125,
"rewards/final_brier_reward_step": 0.7395097613334656,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.816746711730957,
"step": 180
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.28125,
"calib/ece": 0.3064062500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.3064062500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1398.0,
"completions/max_terminated_length": 1398.0,
"completions/mean_length": 791.02734375,
"completions/mean_terminated_length": 803.5833740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 477.0,
"epoch": 0.19306666666666666,
"grad_norm": 0.009865384548902512,
"learning_rate": 5.277777777777779e-07,
"loss": -0.0028,
"num_tokens": 52166142.0,
"reward": 1.8717740774154663,
"reward_std": 0.3827122449874878,
"rewards/accuracy_reward_step": 0.68359375,
"rewards/final_brier_reward_step": 0.6897441148757935,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7036018967628479,
"step": 181
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5045725800201503,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.25921875000000005,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.000365806401612212,
"calib/mean_conf": 0.9896875,
"calib/mu_c": 0.9897860962566845,
"calib/mu_w": 0.9894202898550722,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.25921875000000005,
"calib/std_conf": 0.003521696146745205,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1187.0,
"completions/max_terminated_length": 1187.0,
"completions/mean_length": 813.54296875,
"completions/mean_terminated_length": 826.4564208984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 544.0,
"epoch": 0.19413333333333332,
"grad_norm": 0.009119072929024696,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0038,
"num_tokens": 52480569.0,
"reward": 1.9650317430496216,
"reward_std": 0.25256583094596863,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/final_brier_reward_step": 0.732147216796875,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7529795169830322,
"step": 182
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.05078125,
"calib/ece": 0.2751562500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999998,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.2751562500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1558.0,
"completions/max_terminated_length": 1558.0,
"completions/mean_length": 856.921875,
"completions/mean_terminated_length": 870.5238647460938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 512.0,
"epoch": 0.1952,
"grad_norm": 0.00926928035914898,
"learning_rate": 4.7222222222222226e-07,
"loss": -0.0148,
"num_tokens": 52806621.0,
"reward": 1.9432084560394287,
"reward_std": 0.4737989902496338,
"rewards/accuracy_reward_step": 0.71484375,
"rewards/final_brier_reward_step": 0.7202913761138916,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7791052460670471,
"step": 183
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1640625,
"calib/ece": 0.26450980392156864,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.26450980392156864,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1405.0,
"completions/max_terminated_length": 1405.0,
"completions/mean_length": 836.9921875,
"completions/mean_terminated_length": 850.27783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 466.0,
"epoch": 0.19626666666666667,
"grad_norm": 0.00922873243689537,
"learning_rate": 4.444444444444445e-07,
"loss": -0.0194,
"num_tokens": 53126171.0,
"reward": 1.9618688821792603,
"reward_std": 0.32835304737091064,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.7280253767967224,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7913253307342529,
"step": 184
},
{
"calib/answer_extract_rate": 0.98828125,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1640625,
"calib/ece": 0.25587301587301603,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.25587301587301603,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2511.0,
"completions/max_terminated_length": 2511.0,
"completions/mean_length": 845.546875,
"completions/mean_terminated_length": 865.8400268554688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 516.0,
"epoch": 0.19733333333333333,
"grad_norm": 0.008712178096175194,
"learning_rate": 4.1666666666666667e-07,
"loss": -0.0022,
"num_tokens": 53449551.0,
"reward": 1.9483182430267334,
"reward_std": 0.34782806038856506,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.7277921438217163,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7607932090759277,
"step": 185
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.05078125,
"calib/ece": 0.30889763779527557,
"calib/final_conf_rate": 0.9921875,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 0.9921875,
"calib/nonempty_reasoning_rate": 0.9921875,
"calib/nonempty_step_conf_rate": 0.984375,
"calib/pce": 0.30889763779527557,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2501.0,
"completions/max_terminated_length": 2501.0,
"completions/mean_length": 864.60546875,
"completions/mean_terminated_length": 874.8577270507812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 462.0,
"epoch": 0.1984,
"grad_norm": 0.01143097784370184,
"learning_rate": 3.8888888888888895e-07,
"loss": -0.0059,
"num_tokens": 53775930.0,
"reward": 1.8627067804336548,
"reward_std": 0.23205628991127014,
"rewards/accuracy_reward_step": 0.67578125,
"rewards/final_brier_reward_step": 0.6818546652793884,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7455346584320068,
"step": 186
},
{
"calib/answer_extract_rate": 0.984375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.04296875,
"calib/ece": 0.29952380952380964,
"calib/final_conf_rate": 0.984375,
"calib/format_rate": 0.984375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 0.984375,
"calib/nonempty_reasoning_rate": 0.98828125,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.29952380952380964,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0234375,
"completions/max_length": 2036.0,
"completions/max_terminated_length": 2036.0,
"completions/mean_length": 849.0625,
"completions/mean_terminated_length": 869.4400634765625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 479.0,
"epoch": 0.19946666666666665,
"grad_norm": 0.011029952205717564,
"learning_rate": 3.611111111111111e-07,
"loss": -0.0206,
"num_tokens": 54094834.0,
"reward": 1.8708256483078003,
"reward_std": 0.4722091555595398,
"rewards/accuracy_reward_step": 0.6796875,
"rewards/final_brier_reward_step": 0.6856827735900879,
"rewards/format_reward_step": 0.984375,
"rewards/stepwise_brier_reward": 0.7507448792457581,
"step": 187
},
{
"calib/answer_extract_rate": 0.9921875,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.078125,
"calib/ece": 0.2331372549019608,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.3306690738754696e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2331372549019608,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1574.0,
"completions/max_terminated_length": 1574.0,
"completions/mean_length": 858.39453125,
"completions/mean_terminated_length": 872.0198974609375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 431.0,
"epoch": 0.20053333333333334,
"grad_norm": 0.00768205476924777,
"learning_rate": 3.3333333333333335e-07,
"loss": -0.0058,
"num_tokens": 54418655.0,
"reward": 2.016286849975586,
"reward_std": 0.32633286714553833,
"rewards/accuracy_reward_step": 0.75390625,
"rewards/final_brier_reward_step": 0.7585726380348206,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7987623810768127,
"step": 188
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1875,
"calib/ece": 0.26450980392156864,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.26450980392156864,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1578.0,
"completions/max_terminated_length": 1578.0,
"completions/mean_length": 837.58203125,
"completions/mean_terminated_length": 850.8770141601562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 521.0,
"epoch": 0.2016,
"grad_norm": 0.009047552943229675,
"learning_rate": 3.055555555555556e-07,
"loss": -0.0177,
"num_tokens": 54740844.0,
"reward": 1.9537990093231201,
"reward_std": 0.3179192543029785,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.7280253767967224,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.7590456008911133,
"step": 189
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.506578947368421,
"calib/avg_num_step_conf": 6.04296875,
"calib/ece": 0.28671875000000013,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005263157894738191,
"calib/mean_conf": 0.9898437500000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9894736842105262,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.28671875000000013,
"calib/std_conf": 0.0024951124097923947,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1488.0,
"completions/max_terminated_length": 1488.0,
"completions/mean_length": 852.01171875,
"completions/mean_terminated_length": 865.5357666015625,
"completions/min_length": 0.0,
"completions/min_terminated_length": 484.0,
"epoch": 0.20266666666666666,
"grad_norm": 0.010138653218746185,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.001,
"num_tokens": 55064567.0,
"reward": 1.9208712577819824,
"reward_std": 0.322945773601532,
"rewards/accuracy_reward_step": 0.703125,
"rewards/final_brier_reward_step": 0.7092655897140503,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.755469560623169,
"step": 190
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.23828125,
"calib/ece": 0.3415625000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.3415625000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1484.0,
"completions/max_terminated_length": 1484.0,
"completions/mean_length": 828.1328125,
"completions/mean_terminated_length": 841.27783203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 480.0,
"epoch": 0.20373333333333332,
"grad_norm": 0.009094194509088993,
"learning_rate": 2.5000000000000004e-07,
"loss": -0.0019,
"num_tokens": 55380737.0,
"reward": 1.8121626377105713,
"reward_std": 0.21085627377033234,
"rewards/accuracy_reward_step": 0.6484375,
"rewards/final_brier_reward_step": 0.6553687453269958,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.702657163143158,
"step": 191
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.20703125,
"calib/ece": 0.1892187500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -2.220446049250313e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000002,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.1892187500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1385.0,
"completions/max_terminated_length": 1385.0,
"completions/mean_length": 800.58984375,
"completions/mean_terminated_length": 813.2976684570312,
"completions/min_length": 0.0,
"completions/min_terminated_length": 433.0,
"epoch": 0.2048,
"grad_norm": 0.009250716306269169,
"learning_rate": 2.2222222222222224e-07,
"loss": 0.0208,
"num_tokens": 55690664.0,
"reward": 2.106654405593872,
"reward_std": 0.2364106923341751,
"rewards/accuracy_reward_step": 0.80078125,
"rewards/final_brier_reward_step": 0.8045878410339355,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8251546621322632,
"step": 192
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5073529411764706,
"calib/avg_num_step_conf": 6.16796875,
"calib/ece": 0.25650980392156864,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0005882352941177782,
"calib/mean_conf": 0.9898431372549019,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9894117647058822,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.25650980392156864,
"calib/std_conf": 0.0024999807765504704,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01171875,
"completions/max_length": 2518.0,
"completions/max_terminated_length": 2518.0,
"completions/mean_length": 855.9375,
"completions/mean_terminated_length": 866.0869750976562,
"completions/min_length": 0.0,
"completions/min_terminated_length": 500.0,
"epoch": 0.20586666666666667,
"grad_norm": 0.01025775820016861,
"learning_rate": 1.9444444444444447e-07,
"loss": 0.0057,
"num_tokens": 56015496.0,
"reward": 1.96535325050354,
"reward_std": 0.19145634770393372,
"rewards/accuracy_reward_step": 0.73046875,
"rewards/final_brier_reward_step": 0.7359069585800171,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7583186626434326,
"step": 193
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.26953125,
"calib/ece": 0.2673437500000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 3.3306690738754696e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999997,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.2673437500000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1313.0,
"completions/max_terminated_length": 1313.0,
"completions/mean_length": 824.7734375,
"completions/mean_terminated_length": 837.8651123046875,
"completions/min_length": 0.0,
"completions/min_terminated_length": 500.0,
"epoch": 0.20693333333333333,
"grad_norm": 0.00920251477509737,
"learning_rate": 1.6666666666666668e-07,
"loss": -0.0116,
"num_tokens": 56332582.0,
"reward": 1.9541829824447632,
"reward_std": 0.23327034711837769,
"rewards/accuracy_reward_step": 0.72265625,
"rewards/final_brier_reward_step": 0.7279476523399353,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7684719562530518,
"step": 194
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5168539325842696,
"calib/avg_num_step_conf": 6.0703125,
"calib/ece": 0.3349739583333333,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.9921875,
"calib/gap": 0.007715355805243296,
"calib/mean_conf": 0.9873177083333334,
"calib/mu_c": 0.9899999999999999,
"calib/mu_w": 0.9822846441947566,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.3349739583333333,
"calib/std_conf": 0.028562366562264282,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1432.0,
"completions/max_terminated_length": 1432.0,
"completions/mean_length": 839.01171875,
"completions/mean_terminated_length": 852.3294067382812,
"completions/min_length": 0.0,
"completions/min_terminated_length": 477.0,
"epoch": 0.208,
"grad_norm": 0.009040816687047482,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.0006,
"num_tokens": 56653353.0,
"reward": 1.8100706338882446,
"reward_std": 0.21309393644332886,
"rewards/accuracy_reward_step": 0.65234375,
"rewards/final_brier_reward_step": 0.6631484627723694,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.6865090727806091,
"step": 195
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5130681818181818,
"calib/avg_num_step_conf": 6.55078125,
"calib/ece": 0.30171875000000004,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 1.0,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 0.0010454545454545716,
"calib/mean_conf": 0.98921875,
"calib/mu_c": 0.9895454545454547,
"calib/mu_w": 0.9885000000000002,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.30171875000000004,
"calib/std_conf": 0.005535309244974489,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1174.0,
"completions/max_terminated_length": 1174.0,
"completions/mean_length": 780.6796875,
"completions/mean_terminated_length": 793.0714721679688,
"completions/min_length": 0.0,
"completions/min_terminated_length": 550.0,
"epoch": 0.20906666666666668,
"grad_norm": 0.011479136534035206,
"learning_rate": 1.1111111111111112e-07,
"loss": -0.0029,
"num_tokens": 56955751.0,
"reward": 1.884519100189209,
"reward_std": 0.19984768331050873,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6945406198501587,
"rewards/format_reward_step": 1.0,
"rewards/stepwise_brier_reward": 0.7185356616973877,
"step": 196
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1953125,
"calib/ece": 0.39,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 2.220446049250313e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9899999999999998,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.99609375,
"calib/pce": 0.39,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 0.99609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1401.0,
"completions/max_terminated_length": 1401.0,
"completions/mean_length": 834.015625,
"completions/mean_terminated_length": 847.2540283203125,
"completions/min_length": 0.0,
"completions/min_terminated_length": 522.0,
"epoch": 0.21013333333333334,
"grad_norm": 0.008359176106750965,
"learning_rate": 8.333333333333334e-08,
"loss": 0.0076,
"num_tokens": 57274315.0,
"reward": 1.7043508291244507,
"reward_std": 0.3267871141433716,
"rewards/accuracy_reward_step": 0.59765625,
"rewards/final_brier_reward_step": 0.6054476499557495,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.6416432857513428,
"step": 197
},
{
"calib/answer_extract_rate": 1.0,
"calib/auroc": 0.5066666666666666,
"calib/avg_num_step_conf": 6.3125,
"calib/ece": 0.28257378472187494,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.98828125,
"calib/frac_conf_gt_0.9": 0.99609375,
"calib/gap": 0.0013481481493334835,
"calib/mean_conf": 0.989605034721875,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9886518518506665,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 0.98828125,
"calib/pce": 0.28257378472187494,
"calib/std_conf": 0.006307089708075461,
"calib/step_conf_rate": 0.98828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1326.0,
"completions/max_terminated_length": 1326.0,
"completions/mean_length": 809.23828125,
"completions/mean_terminated_length": 822.0833740234375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 501.0,
"epoch": 0.2112,
"grad_norm": 0.00892886146903038,
"learning_rate": 5.555555555555556e-08,
"loss": 0.0056,
"num_tokens": 57586864.0,
"reward": 1.9245802164077759,
"reward_std": 0.28633540868759155,
"rewards/accuracy_reward_step": 0.70703125,
"rewards/final_brier_reward_step": 0.7056432962417603,
"rewards/format_reward_step": 0.98828125,
"rewards/stepwise_brier_reward": 0.7739272117614746,
"step": 198
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1953125,
"calib/ece": 0.2331372549019608,
"calib/final_conf_rate": 0.99609375,
"calib/format_rate": 0.99609375,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": -3.3306690738754696e-16,
"calib/mean_conf": 0.99,
"calib/mu_c": 0.99,
"calib/mu_w": 0.9900000000000003,
"calib/nonempty_final_conf_rate": 0.99609375,
"calib/nonempty_reasoning_rate": 1.0,
"calib/nonempty_step_conf_rate": 1.0,
"calib/pce": 0.2331372549019608,
"calib/std_conf": 0.0,
"calib/step_conf_rate": 1.0,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 2600.0,
"completions/max_terminated_length": 2600.0,
"completions/mean_length": 849.35546875,
"completions/mean_terminated_length": 862.8373413085938,
"completions/min_length": 0.0,
"completions/min_terminated_length": 524.0,
"epoch": 0.21226666666666666,
"grad_norm": 0.00927034579217434,
"learning_rate": 2.777777777777778e-08,
"loss": 0.0016,
"num_tokens": 57908499.0,
"reward": 2.0206198692321777,
"reward_std": 0.184719055891037,
"rewards/accuracy_reward_step": 0.75390625,
"rewards/final_brier_reward_step": 0.7586503624916077,
"rewards/format_reward_step": 0.99609375,
"rewards/stepwise_brier_reward": 0.8082042336463928,
"step": 199
},
{
"calib/answer_extract_rate": 0.99609375,
"calib/auroc": 0.5,
"calib/avg_num_step_conf": 6.1328125,
"calib/ece": 0.3025000000000001,
"calib/final_conf_rate": 1.0,
"calib/format_rate": 0.9921875,
"calib/frac_conf_gt_0.9": 1.0,
"calib/gap": 1.1102230246251565e-16,
"calib/mean_conf": 0.9900000000000001,
"calib/mu_c": 0.9900000000000001,
"calib/mu_w": 0.99,
"calib/nonempty_final_conf_rate": 1.0,
"calib/nonempty_reasoning_rate": 0.99609375,
"calib/nonempty_step_conf_rate": 0.9921875,
"calib/pce": 0.3025000000000001,
"calib/std_conf": 1.1102230246251565e-16,
"calib/step_conf_rate": 0.9921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1509.0,
"completions/max_terminated_length": 1509.0,
"completions/mean_length": 849.71875,
"completions/mean_terminated_length": 863.2064208984375,
"completions/min_length": 0.0,
"completions/min_terminated_length": 507.0,
"epoch": 0.21333333333333335,
"grad_norm": 0.008647196926176548,
"learning_rate": 0.0,
"loss": -0.0046,
"num_tokens": 58234075.0,
"reward": 1.8795125484466553,
"reward_std": 0.23101741075515747,
"rewards/accuracy_reward_step": 0.6875,
"rewards/final_brier_reward_step": 0.6934944987297058,
"rewards/format_reward_step": 0.9921875,
"rewards/stepwise_brier_reward": 0.7151811122894287,
"step": 200
},
{
"epoch": 0.21333333333333335,
"step": 200,
"total_flos": 0.0,
"train_loss": -0.012948631015606225,
"train_runtime": 11330.6406,
"train_samples_per_second": 4.519,
"train_steps_per_second": 0.018
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 58234075,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}